Esempio n. 1
0
 def __init__(self, value=0x0000):
     valid_value(value, MIN_VALUE, MAX_VALUE)
     self.map = []
     for cont in range(MAX_ADDR // 2):
         self.map.append(address.address(2 * cont, value // 0x100))
         self.map.append(
             address.address(2 * cont + 1,
                             value - (value // 0x100) * 0x100))
     self.map.append(address.address(MAX_ADDR - 1, 0xff))
     self.map.append(address.address(MAX_ADDR, 0xfc))
Esempio n. 2
0
 def loadModule(self, moduleName):
     try:
         if moduleName == 'nospam':
             import nospam
             modObject = nospam.nospam(self)
         elif moduleName == 'noflood':
             import noflood
             modObject = noflood.noflood(self)
         elif moduleName == 'address':
             import address
             modObject = address.address(self)
         elif moduleName == 'akick':
             import akick
             modObject = akick.akick(self)
         elif moduleName == 'nobots':
             import nobots
             modObject = nobots.nobots(self)
         elif moduleName == 'noinsult':
             import noinsult
             modObject = noinsult.noinsult(self)
         else:
             self.logger.log(
                 0, 'DEBUG',
                 u'[Función loadModule]: El módulo %s no existe.' %
                 moduleName)
             return False
     except ImportError:
         return False
     self.moduleList[moduleName] = modObject
     return True
Esempio n. 3
0
    def loadModule(self, moduleName):
	 try:
              if moduleName == 'nospam':
		   import nospam
		   modObject = nospam.nospam(self)
	      elif moduleName == 'noflood':
		   import noflood
		   modObject = noflood.noflood(self)
	      elif moduleName == 'address':
	           import address
		   modObject = address.address(self)
	      elif moduleName == 'akick':
	           import akick
		   modObject = akick.akick(self)
	      elif moduleName == 'nobots':
	           import nobots
		   modObject = nobots.nobots(self)
	      elif moduleName == 'noinsult':
	           import noinsult
		   modObject = noinsult.noinsult(self)
	      else:
		   self.logger.log(0, 'DEBUG', u'[Función loadModule]: El módulo %s no existe.' % moduleName)
		   return False
	 except ImportError:
	      return False
	 self.moduleList[moduleName] = modObject
	 return True
 def execute(message):
     msg = message.decode("utf-8")
     msg.strip()
     lista = msg.split()
     print(lista)
     command = lista[0].upper()
     if len(lista) > 1 and command in ("INSERT", "QUERY"):
         key = int(lista[1])
         location = address(key, Bucket.fs)
         if location != Bucket.bucketNbr:
             return Bucket.forward(location, msg)
     try:
         if command == "INSERT":
             response = Bucket.insert(int(lista[1]), lista[2])
             if len(lista) > 3 and lista[3] == "FWD":
                 response = "IAM {}".format(Bucket.fs.extent)
             return response
         elif command == "QUERY":
             return Bucket.query(int(lista[1]))
         elif command == "POPULATION":
             Bucket.population(lista)
             return "ACK"
         elif command == "REHASH":
             Bucket.fs = FileState(int(lista[1]))
             Bucket.rehash(Bucket.fs)
             return "ACK"
         elif command == "SHOW":
             return Bucket.show()
         else:   #TODO
             return "NOPE"
     except KeyError:
         return "key error"
Esempio n. 5
0
 def __init__(self):
     self.firstName = "Yan"
     self.lastName = "Carvalho Borges"
     self.birth = "21/03/1997"
     self.gender = "Male"
     self.address = address('Brasil', 'Cruzeiro', 'Rua Maria José Tabaco',
                            '170')
     self.occupation = None
Esempio n. 6
0
def main():
    last = "Trentwood OR 94701"
    delivery = "1402 SW Alder st."
    second = "APT 11"
    testAddress = address(last, delivery, second)
    testRecipient = "John Doe"

    last2 = "Oakland CA 94501"
    delivery2 = "1244 Broadway st."
    second2 = "APT 11"
    testAddress2 = address(last2, delivery2, second2)
    testRecipient2 = "Lucas Rondenet"

    testContact = contact(testRecipient)
    testContact2 = contact(testRecipient2)

    testEmail = "*****@*****.**"
    testEmail2 = "*****@*****.**"
    testPhoneNumber = "542-345-6745"
    testEmail3 = "*****@*****.**"
    testEmail4 = "*****@*****.**"
    testPhoneNumber2 = "545-565-7889"

    testContact.addAddress(testAddress)
    testContact.addEmail(testEmail)
    testContact.addEmail(testEmail2)
    testContact.addPhoneNumber(testPhoneNumber)
    testContact2.addAddress(testAddress2)
    testContact2.addEmail(testEmail3)
    testContact2.addEmail(testEmail4)
    testContact2.addPhoneNumber(testPhoneNumber2)

    print(testContact)
    #testContact.removeAddress(testAddress)
    print(testContact)
    print
    print(testContact2)

    testContact.addField("age", "32")
    testContact3 = contact("John Newhall")
    print(testContact.age)
    print(vars(testContact))
    print(vars(testContact2))
    print(vars(testContact3))
Esempio n. 7
0
 def setUp(self):
     from address import address
     if self.adr_svc is None:
         self.adr_svc = address()
     if len(self.adrs) is 0:
         self.adrs =\
          self.adr_svc.list_addresses({
              'address': None,
              'stage': 'dev'
              })
Esempio n. 8
0
 def get_addresses(self):
     try:
         addresses = self._data["results"][0]["addresses"]
         valid_addresses = []
         for add in addresses:
             if add["address_purpose"] == "LOCATION":
                 valid_addresses.append(address(add))
         return valid_addresses
     except:
         pass
Esempio n. 9
0
 def addressBookImport(self, fileName, app):
     if os.path.exists(fileName):
         f = open(fileName, 'r')
         data = utils.importParse(f)
         for element in data:
             newContact = contact(element['Recipient'])
             newAddress = address(element['Last'], element['Delivery'], element['Second'])
             newContact.addAddress(newAddress)
             if 'Phone' in element:
                 newContact.addPhoneNumber(element['Phone'])
             if 'Email' in element:
                 newContact.addEmail(element['Email'])
             self.addContact(newContact)
             app.cmdUpdateListbox(self.contacts)
Esempio n. 10
0
def main():
    last = "San Diego OR 94501"
    delivery = "1402 SW Alder st."
    second = "APT 11"
    test = address(last, delivery, second)
    print(test)
    print(test.addressNumber)
    print(test.address)
    print(test.city)
    print(test.state)
    print(test.zip)
    last2 = "Oakland CA 94501"
    delivery2 = "1235 Broadway st."
    second2 = " "
    test2 = address(last2, delivery2, second2)
    print(test2)
    print(test2.addressNumber)
    print(test2.address)
    print(test2.city)
    print(test2.state)
    print(test2.zip)


    '''test3 = address(last2, delivery2, second2)
Esempio n. 11
0
    def cmdAdd(self):
        """create a new contact based on the contact's name"""
        self.tempContact = contact.contact(self.entryName.get())
        """build a temp address"""
        self.tempAddress = address.address(self.entryAddressLast.get(),self.entryAddressDelivery.get(),self.entryAddressSecond.get())
        """add the address to the contact object"""
        self.tempContact.addAddress(self.tempAddress)
        """add the email address to the contact object"""
        self.tempContact.addEmail(self.entryEmail.get())
        """get and add the phone number to the contact object"""
        self.tempContact.addPhoneNumber(self.entryPhone.get())
        """add the contact to the address book"""
        self.logic.addContact(self.tempContact)
        """update the listbox"""
        self.cmdUpdateListbox(self.logic.contacts)

        """set the addressbook state to unsaved"""
        self.unSavedChanges = 1
Esempio n. 12
0
def match_algo(nric, req_location, date, startTime, endTime, req_num_kids, req_pay_amt):


	score = 0

	cs = CSV_reader()
	cs.read_file()
	weighted_users = []
	time_date = {}
	# print(date)
	# print(cs.users)
	# print("hello")
	for i in range(len(date)):
		current_date = date[i].split('-')
		int_date = map(int, current_date)
		day_of_week = str(datetime.date(int_date[0], int_date[1], int_date[2]).weekday() + 1)
		duration = startTime[i] + '-' + endTime[i] 

		
		if (day_of_week) in time_date.keys():
			time_date[day_of_week].append(duration)
		else:
			time_date[day_of_week] = []
			time_date[day_of_week].append(duration)
	for u in cs.users:
		if (u.id == nric):
			continue
		num_kids = numOfKids(req_num_kids, u.n_kids)
		req_pay_amt = float(req_pay_amt)
		pay_amt = payment(req_pay_amt, u.min_amt)
		# print(time_date)
		# print(u.time_available)
		time_matched = timeMatching(time_date, u.time_available)
		distance = address(u.location, req_location)

		if (time_matched==0 | num_kids==0):
			continue
		
		score = 0.5*float(time_matched) + 0.3*float(distance) + 0.2*float(pay_amt)
		weighted_users.append([u.id, score])

	sorted_weighted_users = sorted(weighted_users, key=itemgetter(1), reverse=True)
	print(sorted_weighted_users) 
 def rehash(fs):
     print("Rehashing")
     print(fs)
     deleteList = []
     for key in Bucket.dicc:
         location = address(key, fs)
         if location == Bucket.bucketNbr:
             print("No need to rehash for key {}".format(key))
             continue
         sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
         if location not in Bucket.bucketList:
             try:
                 # Connect to server and send data
                 sock.connect((Bucket.coHost, Bucket.coPort))
                 data = "POPULATE"
                 sock.sendall(bytes(data + "\n", "utf-8"))
                 # Receive data from the server
                 received = str(sock.recv(1024), "utf-8")
             finally:
                 #Close connection
                 sock.close()
             #process reply
             reply = received.split()
             if reply[0] == "POPULATION":
                 Bucket.population(reply)
         destAddress = Bucket.bucketList[location].split()
         destHost, destPort = destAddress[0], int(destAddress[1])
         sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
         try:
             sock.connect((destHost, destPort))
             data = "INSERT {} {}".format(key, Bucket.dicc[key])
             sock.sendall(bytes(data + "\n", "utf-8"))
             received = str(sock.recv(1024), "utf-8")
             if received == "ACK":
                 deleteList.append(key)
         finally:
             #Close connection
             sock.close()
         print(received)
     for key in deleteList:
         Bucket.dicc.pop(key)
Esempio n. 14
0
def main():
    testAddressBook = addressbook()

    testContact1 = contact("John Doe")
    testLast1 = "San Diego CA 94501"
    testDelivery1 = "1401 SW Main St."
    testSecond1 = ""
    testAddr1 = address(testLast1, testDelivery1, testSecond1)
    testEmail1 = "*****@*****.**"
    testEmail2 = "*****@*****.**"
    testPhoneNumber1 = "542-345-6745"
    testContact1.addAddress(testAddr1)
    testContact1.addEmail(testEmail1)
    testContact1.addEmail(testEmail2)
    testContact1.addPhoneNumber(testPhoneNumber1)
    print(testContact1)
    print(testContact1.mailingFormat())
    #print(testContact1.city)
    #print(testContact1.state)
    #print(testContact1.zip)

    '''testContact2 = contact("Mary Sue")
 def execute(message):
     msg=message.decode("utf-8")
     msg.strip()
     lista = msg.split()
     print(lista)
     
     command = lista[0].upper()
     if len(lista) > 1 and command in ("INSERT", "QUERY"):
         key = int(lista[1])
         location = address(key, Bucket.fs)
         if location != Bucket.bucketNbr:
             return Bucket.forward(location, msg)
     try:
         if command == "INSERT":
             if len(lista) < 3: return "Invalid Command."
             else: return Coordinator.insert(int(lista[1]), lista[2])
         #return "ACK"
         elif command == "QUERY":
             if len(lista) < 2: return "Invalid Command."
             else: return Bucket.query(int(lista[1]))
         elif command == "REGISTER":
             bucketNbr = Coordinator.totalBuckets
             Coordinator.totalBuckets += 1
             Bucket.bucketList[bucketNbr] = "{0} {1}".format(lista[1], lista[2])
             print(Bucket.bucketList)
             return "{}".format(bucketNbr)
         elif command == "POPULATE":
             return "POPULATION "+' '.join("{} {}".format(k,v) for k,v in Bucket.bucketList.items())
         elif command == "SPLIT":
             return Coordinator.split()
         elif command == "SHOW":
             return Coordinator.show()
         else:
             return "NOPE"
     except KeyError:
         return "key error"
Esempio n. 16
0
    def parse(self, xml_string, counters, input_file_name, curs):
        url = '{http://clarivate.com/schema/wok5.27/public/FullRecord}'

        try:
            root = ET.fromstring(xml_string)
            for REC in root:
                # parse publications and create a publication object containing all the attributes of publication
                new_pub = pub.publication()
                # old method commented
                # r_publication = dict()
                # Couter class to generate surrogate ids temporary for now later they will be replaced by auto incremental collumns in data base

                counters.r_publication_seq += 1
                new_pub.id = counters.r_publication_seq
                # Finding UID in the xml by finding the UID tag inside a record
                new_pub.source_id = REC.find(url + 'UID').text

                pub_info = REC.find('.//' + url + 'pub_info')
                new_pub.source_type = pub_info.get('pubtype')

                source_title = REC.find('.//' + url + "title[@type='source']")

                if source_title is not None:
                    if source_title.text is not None:
                        new_pub.source_title = source_title.text.encode(
                            'utf-8')
                # extracting values from properties of pub_info tag in XMl
                new_pub.has_abstract = pub_info.get('has_abstract')
                new_pub.publication_year = pub_info.get('pubyear')
                new_pub.issue = pub_info.get('issue')
                new_pub.volume = pub_info.get('vol')
                new_pub.pubmonth = pub_info.get('pubmonth')
                new_pub.publication_date = pub_info.get('sortdate')
                new_pub.coverdate = pub_info.get('coverdate')

                page_info = pub_info.find(url + 'page')
                if page_info is not None:
                    new_pub.begin_page = page_info.get('begin')
                    new_pub.end_page = page_info.get('end')

                document_title = REC.find('.//' + url + "title[@type='item']")
                if document_title is not None:
                    if document_title.text is not None:
                        new_pub.document_title = document_title.text. \
                            encode('utf-8')

                document_type = REC.find('.//' + url + 'doctype')
                if document_type is not None:
                    if document_type.text is not None:
                        new_pub.document_type = document_type.text

                publisher_name = REC.find('.//' + url +
                                          "name[@role='publisher']")
                if publisher_name is not None:
                    pub_name = publisher_name.find('.//' + url + 'full_name')
                    if pub_name is not None:
                        if pub_name.text is not None:
                            new_pub.publisher_name = pub_name.text. \
                                encode('utf-8')

                pub_address_no = REC.find('.//' + url +
                                          "address_spec[@addr_no='1']")
                if pub_address_no is not None:
                    publisher_address = pub_address_no.find('.//' + url +
                                                            'full_address')
                    if publisher_address is not None:
                        if publisher_address.text is not None:
                            new_pub.publisher_address = publisher_address.text. \
                                encode('utf-8')

                # r_publication['language'] = ''
                languages = REC.find('.//' + url + 'languages')
                if languages is not None:
                    language = languages.find('.//' + url + 'language')
                    if language is not None:
                        if language.text is not None:
                            new_pub.language = language.text.encode('utf-8')

                new_pub.edition = REC.find('.//' + url +
                                           'edition').get('value')
                new_pub.source_filename = input_file_name
                new_pub.created_date = datetime.date.today()
                new_pub.last_modified_date = datetime.date.today()
                ## query to insert a publication record into the publications table in the database
                ## The query may be written into a saperate file in future from where it is read in the form of a string ammended values and executed to make code look better
                curs.execute(
                    "INSERT INTO wos_publications(begin_page, created_date, document_title, document_type,edition, end_page,has_abstract,id,issue,language,last_modified_date,publication_date,publication_year,publisher_address,publisher_name,source_filename,source_id,source_title,source_type,volume)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) ON CONFLICT (source_id) DO UPDATE SET begin_page = excluded.begin_page, created_date = excluded.created_date,document_title = excluded.document_title, document_type = excluded.document_type, edition = excluded.edition,end_page = excluded.end_page, has_abstract = excluded.has_abstract, id = excluded.id, issue = excluded.issue,language = excluded.language, last_modified_date = excluded.last_modified_date,publication_date = excluded.publication_date, publication_year = excluded.publication_year,publisher_address = excluded.publisher_address, publisher_name = excluded.publisher_name,source_filename = excluded.source_filename, source_id = excluded.source_id, source_title = excluded.source_title,source_type = excluded.source_type, volume = excluded.volume;",
                    (str(new_pub.begin_page), new_pub.created_date,
                     str(new_pub.document_title), str(new_pub.document_type),
                     str(new_pub.edition), str(new_pub.end_page),
                     str(new_pub.has_abstract), str(
                         new_pub.id), str(new_pub.issue), str(
                             new_pub.language), new_pub.last_modified_date,
                     new_pub.publication_date, str(new_pub.publication_year),
                     str(new_pub.publisher_address), str(
                         new_pub.publisher_name), str(new_pub.source_filename),
                     str(new_pub.source_id), str(new_pub.source_title),
                     new_pub.source_type, str(new_pub.volume)))
                ##old code for writing the publications data into a CSV file
                '''writer_pub.writerow((r_publication['id'], r_publication['source_id'], \
                                     r_publication['source_type'], r_publication['source_title'], \
                                     r_publication['language'], r_publication['document_title'], \
                                     r_publication['document_type'], r_publication['has_abstract'], \
                                     r_publication['issue'], r_publication['volume'], \
                                     r_publication['begin_page'], r_publication['end_page'], \
                                     r_publication['publisher_name'], r_publication['publisher_address'], \
                                     r_publication['publication_year'], r_publication['publication_date'], \
                                     r_publication['created_date'], r_publication['last_modified_date'], \
                                     r_publication['edition'], r_publication['source_filename']))'''
                # parse grants in funding acknowledgements for each publication
                # old method of creating a dict type
                # r_grant = dict( )

                # New method of creating an object to store everything in the form of proper objects which could be developed into classes having their own properties in future
                r_grant = grant.grant()
                r_grant.source_id = new_pub.source_id

                # r_grant.funding_ack = ''
                FUNDING_ACK = REC.find('.//' + url + 'fund_text')

                if FUNDING_ACK is not None:  # if funding acknowledgement exists, then extract the r_grant(s) data
                    funding_ack_p = FUNDING_ACK.find('.//' + url + 'p')
                    if funding_ack_p is not None:
                        if funding_ack_p.text is not None:
                            r_grant.funding_ack = funding_ack_p.text.encode(
                                'utf-8')
                # looping through all the r_grant tags
                for l_grant in REC.findall('.//' + url + 'grant'):
                    # r_grant.grant_agency = ''
                    grant_agency = l_grant.find('.//' + url + 'grant_agency')
                    if grant_agency is not None:
                        if grant_agency.text is not None:
                            r_grant.grant_agency = grant_agency.text.encode(
                                'utf-8')

                    grant_ids = l_grant.find('.//' + url + 'grant_ids')
                    if grant_ids is not None:
                        for grant_id in grant_ids.findall('.//' + url +
                                                          'grant_id'):
                            counters.r_grant_seq = counters.r_grant_seq + 1
                            r_grant.id = counters.r_grant_seq
                            # r_grant.grant_number = ''
                            if grant_id is not None:
                                if grant_id.text is not None:
                                    r_grant.grant_number = grant_id.text.encode(
                                        'utf-8')
                            if r_grant.funding_ack is not None:
                                # insert the grant details in the grants table if there is any funding acknowledgement in the records
                                curs.execute(
                                    "INSERT INTO wos_grants(id,source_id,grant_number,grant_organization,funding_ack,source_filename)VALUES(%s,%s,%s,%s,%s,%s) ON CONFLICT (source_id, grant_number, grant_organization) DO UPDATE SET id = excluded.id, source_id = excluded.source_id, grant_number = excluded.grant_number,grant_organization = excluded.grant_organization, funding_ack = excluded.funding_ack,source_filename = excluded.source_filename;",
                                    (str(r_grant.id), str(r_grant.source_id),
                                     str(r_grant.grant_number),
                                     str(r_grant.grant_agency),
                                     str(r_grant.funding_ack),
                                     str(new_pub.source_filename)))
                                '''writer_grant.writerow((r_grant['id'],r_grant['source_id'],\
                                r_grant['grant_number'],r_grant['grant_agency'],\
                                r_grant['funding_ack'],\
                                r_publication['source_filename']))'''
                # insert code to insert record in r_grant table
                # parse document object identifiers for each publication
                r_dois = dois.dois()
                r_dois.source_id = new_pub.source_id

                IDS = REC.find('.//' + url + 'identifiers')
                if IDS is not None:
                    for identifier in IDS.findall('.//' + url + 'identifier'):
                        # r_dois['doi'] = None
                        id_value = identifier.get('value')
                        if id_value is not None:
                            r_dois.doi = id_value.encode('utf-8')
                        # r_dois['doi_type'] = ''
                        id_type = identifier.get('type')
                        if id_type is not None:
                            r_dois.doi_type = id_type.encode('utf-8')
                        # write each doi to CSV file for wos_document_identifiers table
                        if r_dois.doi is not None:
                            counters.r_doi_seq = counters.r_doi_seq + 1
                            r_dois.id = counters.r_doi_seq
                            # insering records into wos_document_identifier table
                            curs.execute(
                                "INSERT INTO wos_document_identifiers(id,source_id,document_id,document_id_type,source_filename)VALUES(%s,%s,%s,%s,%s) ON CONFLICT (source_id, document_id_type, document_id) DO UPDATE SET id = excluded.id, source_id = excluded.source_id, document_id = excluded.document_id,document_id_type = excluded.document_id_type, source_filename = excluded.source_filename;",
                                (str(r_dois.id), str(r_dois.source_id),
                                 str(r_dois.doi), str(r_dois.doi_type),
                                 str(new_pub.source_filename)))
                            '''writer_dois.writerow((r_dois['id'], r_dois['source_id'], \
                                                  r_dois['doi'], r_dois['doi_type'], \
                                                  r_publication['source_filename']))'''

                # parse keyword for each publication
                keywords = REC.find('.//' + url + 'keywords_plus')
                if keywords is not None:
                    r_keyword = key_word.keyword()
                    r_keyword.source_id = new_pub.source_id
                    for keyword in keywords.findall('.//' + url + 'keyword'):
                        if keyword is not None:
                            if keyword.text is not None:
                                r_keyword.keyword = keyword.text.encode(
                                    'utf-8')
                                counters.r_keyword_seq = counters.r_keyword_seq + 1
                                r_keyword.id = counters.r_keyword_seq
                                # inserting records in wos_keywords
                                curs.execute(
                                    "INSERT INTO wos_keywords(id,source_id,keyword,source_filename)VALUES(%s,%s,%s,%s)ON CONFLICT (source_id, keyword) DO UPDATE SET id = excluded.id, source_id = excluded.source_id, keyword = excluded.keyword,source_filename = excluded.source_filename;",
                                    (str(r_keyword.id), str(
                                        r_keyword.source_id),
                                     str(r_keyword.keyword),
                                     str(new_pub.source_filename)))
                                # old code for insering data into a text file
                                ''''writer_keyword.writerow((r_keyword['id'], \
                                                         r_keyword['source_id'], r_keyword['keyword'], \
                                                         r_publication['source_filename']))''' ''

                # parse abstract for each publication
                if new_pub.has_abstract == 'Y':
                    abstracts = REC.find('.//' + url + 'abstracts')
                    if abstracts is not None:
                        r_abst = abst.abstract()
                        r_abst.source_id = new_pub.source_id
                        r_abstract_text = ''
                        for abstract_text in abstracts.findall('.//' + url +
                                                               'p'):
                            if abstract_text is not None:
                                if abstract_text.text is not None:
                                    if r_abstract_text:
                                        r_abstract_text = r_abstract_text.join(
                                            '\n\n')
                                    r_abstract_text = r_abstract_text + abstract_text.text.encode(
                                        'utf-8')
                        # adding all the abstract paragraphs into one before writing it into the database
                        r_abst.abstract_text = r_abstract_text

                        # old code
                        # r_abst['abstract_text'] = abstract_text.text.\
                        #                           encode('utf-8')
                        # r_abstract_seq +=1
                        # r_abst['id'] = r_abstract_seq
                        # writer_abstract.writerow((r_abst['id'],\
                        #     r_abst['source_id'],r_abst['abstract_text'],\
                        #     r_publication['source_filename']))
                        # writing the abstracts record into the data base
                        curs.execute(
                            "INSERT INTO wos_abstracts(source_id,abstract_text,source_filename)VALUES(%s,%s,%s) ON CONFLICT (source_id) DO UPDATE SET source_id = excluded.source_id, abstract_text = excluded.abstract_text, source_filename = excluded.source_filename;;",
                            (str(r_abst.source_id), str(r_abst.abstract_text),
                             str(new_pub.source_filename)))
                        '''writer_abstract.writerow(
                            (r_abst['source_id'], r_abst['abstract_text'], r_publication['source_filename']))'''

                # parse addresses for each publication

                r_addr = add.address()
                # r_addr.id = {}
                # r_addr.source_id = {}
                # r_addr['addr_name'] = {}
                # r_addr['organization'] = {}
                # r_addr['suborganization'] = {}
                # r_addr['city'] = {}
                # r_addr['country'] = {}
                # r_addr['zip'] = {}
                addr_no_list = []
                addresses = REC.find('.//' + url + 'addresses')
                for addr in addresses.findall('.//' + url + 'address_spec'):

                    addr_ind = addr.get('addr_no')
                    if addr_ind is None:
                        addr_ind = 0
                    else:
                        addr_ind = int(addr_ind)
                        # Kepp all addr_no for the following reference by authors
                        addr_no_list.append(int(addr_ind))

                    r_addr.source_id[addr_ind] = new_pub.source_id
                    r_addr.addr_name[addr_ind] = ''
                    addr_name = addr.find('.//' + url + 'full_address')
                    if addr_name is not None:
                        if addr_name.text is not None:
                            r_addr.addr_name[addr_ind] = addr_name.text.encode(
                                'utf-8')
                    r_addr.organization[addr_ind] = ''
                    organization = addr.find('.//' + url +
                                             "organization[@pref='Y']")
                    if organization is not None:
                        if organization.text is not None:
                            r_addr.organization[addr_ind] = organization.text. \
                                encode('utf-8')
                    r_addr.sub_organization[addr_ind] = ''
                    suborganization = addr.find('.//' + url +
                                                'suborganization')
                    if suborganization is not None:
                        if suborganization.text is not None:
                            r_addr.sub_organization[addr_ind] = suborganization.text. \
                                encode('utf-8')
                    r_addr.city[addr_ind] = ''
                    city = addr.find('.//' + url + 'city')
                    if city is not None:
                        if city.text is not None:
                            r_addr.city[addr_ind] = city.text.encode('utf-8')
                    r_addr.country[addr_ind] = ''
                    country = addr.find('.//' + url + 'country')
                    if country is not None:
                        if country.text is not None:
                            r_addr.country[addr_ind] = country.text.encode(
                                'utf-8')
                    r_addr.zip_code[addr_ind] = ''
                    addr_zip = addr.find('.//' + url + 'zip')
                    if addr_zip is not None:
                        if addr_zip.text is not None:
                            r_addr.zip_code[addr_ind] = addr_zip.text.encode(
                                'utf-8')
                    if r_addr.addr_name[addr_ind] is not None:
                        counters.r_addr_seq += 1
                        r_addr.id[addr_ind] = counters.r_addr_seq
                        # Insering address records into database
                        curs.execute(
                            "INSERT INTO wos_addresses(id,source_id,address_name,organization,sub_organization,city,country,zip_code,source_filename)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s)ON CONFLICT (source_id, address_name) DO UPDATE SET id = excluded.id, source_id = excluded.source_id, address_name = excluded.address_name,organization = excluded.organization, sub_organization = excluded.sub_organization, city = excluded.city,country = excluded.country, zip_code = excluded.zip_code, source_filename = excluded.source_filename;",
                            (str(r_addr.id[addr_ind]),
                             str(r_addr.source_id[addr_ind]),
                             str(r_addr.addr_name[addr_ind]),
                             str(r_addr.organization[addr_ind]),
                             str(r_addr.sub_organization[addr_ind]),
                             str(r_addr.city[addr_ind]),
                             str(r_addr.country[addr_ind]),
                             str(r_addr.zip_code[addr_ind]),
                             str(new_pub.source_filename)))
                        '''writer_address.writerow((r_addr['id'][addr_ind], \
                                                 r_addr['source_id'][addr_ind], r_addr['addr_name'][addr_ind], \
                                                 r_addr['organization'][addr_ind], \
                                                 r_addr['suborganization'][addr_ind], r_addr['city'][addr_ind], \
                                                 r_addr['country'][addr_ind], r_addr['zip'][addr_ind], \
                                                 r_publication['source_filename']))'''

                # parse titles for each publication
                r_title = ti.title()
                r_title.source_id = new_pub.source_id
                r_title.id = counters.r_title_seq

                summary = REC.find('.//' + url + 'summary')
                if summary is not None:
                    titles = summary.find('.//' + url + 'titles')
                    if titles is not None:
                        for title in titles.findall('.//' + url + 'title'):
                            if title is not None:
                                if title.text is not None:
                                    r_title.title = title.text.encode('utf-8')
                                    r_title.type = title.get('type')
                                    r_title.id += 1
                                    # inserting titles into the database
                                    curs.execute(
                                        "INSERT INTO wos_titles(id,source_id,title,type,source_filename)VALUES(%s,%s,%s,%s,%s)ON CONFLICT (source_id, type) DO UPDATE SET id = excluded.id, source_id = excluded.source_id, title = excluded.title, type = excluded.type,source_filename = excluded.source_filename;",
                                        (str(r_title.id), str(
                                            r_title.source_id),
                                         str(r_title.title), str(r_title.type),
                                         str(new_pub.source_filename)))
                                    '''writer_title.writerow((r_title['id'], \
                                                           r_title['source_id'], r_title['title'], \
                                                           r_title['type'], r_publication['source_filename']))'''

                # parse authors for each publication
                r_author = auth.author()
                r_author.source_id = new_pub.source_id

                summary = REC.find('.//' + url + 'summary')
                names = summary.find(url + 'names')
                for name in names.findall(url + "name[@role='author']"):
                    # for name in REC.findall('.//'+url+"name[@role='author']"):
                    # r_author.full_name = ''
                    full_name = name.find(url + 'full_name')
                    if full_name is not None:
                        if full_name.text is not None:
                            r_author.full_name = full_name.text.encode('utf-8')
                    # r_author['wos_standard'] = ''
                    wos_standard = name.find(url + 'wos_standard')
                    if wos_standard is not None:
                        if wos_standard.text is not None:
                            r_author.wos_standard = wos_standard.text.encode(
                                'utf-8')
                    r_author.first_name = ''
                    first_name = name.find(url + 'first_name')
                    if first_name is not None:
                        if first_name.text is not None:
                            r_author.first_name = first_name.text.encode(
                                'utf-8')
                    # r_author.last_name = ''
                    last_name = name.find(url + 'last_name')
                    if last_name is not None:
                        if last_name.text is not None:
                            r_author.last_name = last_name.text.encode('utf-8')
                    # r_author['email_addr'] = ''
                    email_addr = name.find(url + 'email_addr')
                    if email_addr is not None:
                        if email_addr.text is not None:
                            r_author.email_addr = email_addr.text.encode(
                                'utf-8')

                    r_author.seq_no = name.get('seq_no')
                    r_author.dais_id = name.get('dais_id')
                    r_author.r_id = name.get('r_id')
                    addr_seqs = name.get('addr_no')
                    # r_author.address = ''
                    r_author.address_id = ''
                    r_author.addr_seq = ''
                    if addr_seqs is not None:
                        addr_no_str = addr_seqs.split(' ')
                        for addr_seq in addr_no_str:
                            if addr_seq is not None:
                                addr_index = int(addr_seq)
                                if addr_index in addr_no_list:
                                    r_author.address = r_addr.addr_name[
                                        addr_index]
                                    r_author.address_id = r_addr.id[addr_index]
                                    r_author.addr_seq = addr_seq
                                    counters.r_author_seq += 1
                                    r_author.id = counters.r_author_seq
                                    # inserting records into author table of data base
                                    curs.execute(
                                        "INSERT INTO wos_authors(id,source_id,full_name,last_name,first_name,seq_no,address_seq,address,email_address,address_id,dais_id,r_id,source_filename)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) ON CONFLICT (source_id, seq_no, address_id) DO UPDATE SET id = excluded.id, source_id = excluded.source_id, full_name = excluded.full_name,last_name = excluded.last_name, first_name = excluded.first_name, seq_no = excluded.seq_no,address_seq = excluded.address_seq, address = excluded.address, email_address = excluded.email_address,address_id = excluded.address_id, dais_id = excluded.dais_id, r_id = excluded.r_id,source_filename = excluded.source_filename;",
                                        (str(r_author.id),
                                         str(r_author.source_id),
                                         str(r_author.full_name),
                                         str(r_author.last_name),
                                         str(r_author.first_name),
                                         str(r_author.seq_no),
                                         str(r_author.addr_seq),
                                         str(r_author.address),
                                         str(r_author.email_addr),
                                         str(r_author.address_id),
                                         str(r_author.dais_id),
                                         str(r_author.r_id),
                                         str(new_pub.source_filename)))
                                    '''writer_author.writerow((r_author['id'], \
                                                            r_author['source_id'], r_author['full_name'], \
                                                            r_author['last_name'], r_author['first_name'], \
                                                            r_author['seq_no'], r_author['addr_seq'], \
                                                            r_author['address'], r_author['email_addr'], \
                                                            r_author['address_id'], r_author['dais_id'], \
                                                            r_author['r_id'], r_publication['source_filename']))'''
                    else:
                        counters.r_author_seq += 1
                        r_author.id = counters.r_author_seq
                        r_author.address_id = 0
                        r_author.addr_seq = 0
                        # inserting records into author tables of database
                        curs.execute(
                            "INSERT INTO wos_authors(id,source_id,full_name,last_name,first_name,seq_no,email_address,dais_id,r_id,source_filename)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)ON CONFLICT (source_id, seq_no, address_id) DO UPDATE SET id = excluded.id, source_id = excluded.source_id, full_name = excluded.full_name,last_name = excluded.last_name, first_name = excluded.first_name, seq_no = excluded.seq_no,address_seq = excluded.address_seq, address = excluded.address, email_address = excluded.email_address,address_id = excluded.address_id, dais_id = excluded.dais_id, r_id = excluded.r_id,source_filename = excluded.source_filename;",
                            (str(r_author.id), str(r_author.source_id),
                             str(r_author.full_name), str(r_author.last_name),
                             str(r_author.first_name), str(r_author.seq_no),
                             str(r_author.email_addr), str(r_author.dais_id),
                             str(r_author.r_id), str(new_pub.source_filename)))
                        '''writer_author.writerow((r_author['id'], r_author['source_id'], \
                                                r_author['full_name'], r_author['last_name'], \
                                                r_author['first_name'], r_author['seq_no'], \
                                                r_author['addr_seq'], r_author['address'], \
                                                r_author['email_addr'], r_author['address_id'], \
                                                r_author['dais_id'], r_author['r_id'], \
                                                r_publication['source_filename']))'''

                # parse reference data for each publication
                REFERENCES = REC.find('.//' + url + 'references')
                for ref in REFERENCES.findall('.//' + url + 'reference'):
                    # print "inside reference"
                    r_reference = reference.reference()
                    r_reference.source_id = new_pub.source_id
                    r_reference.cited_source_uid = None
                    cited_source_id = ref.find('.//' + url + 'uid')
                    if cited_source_id is not None:
                        if cited_source_id.text is not None:
                            r_reference.cited_source_uid = cited_source_id.text. \
                                encode('utf-8')
                    # r_reference['cited_title'] = ''
                    cited_title = ref.find('.//' + url + 'citedTitle')
                    if cited_title is not None:
                        if cited_title.text is not None:
                            r_reference.cited_title = cited_title.text.encode(
                                'utf-8')
                    r_reference.cited_work = ''
                    cited_work = ref.find('.//' + url + 'citedWork')
                    if cited_work is not None:
                        if cited_work.text is not None:
                            r_reference.cited_work = cited_work.text.encode(
                                'utf-8')
                    # r_reference['cited_author'] = ''
                    cited_author = ref.find('.//' + url + 'citedAuthor')
                    if cited_author is not None:
                        if cited_author.text is not None:
                            r_reference.cited_author = cited_author.text.encode(
                                'utf-8')[:299]
                    # r_reference['cited_year'] = ''
                    cited_year = ref.find('.//' + url + 'year')
                    if cited_year is not None:
                        if cited_year.text is not None:
                            r_reference.cited_year = cited_year.text.encode(
                                'utf-8')
                    # r_reference.cited_page = ''
                    cited_page = ref.find('.//' + url + 'page')
                    if cited_page is not None:
                        if cited_page.text is not None:
                            r_reference.cited_page = cited_page.text.encode(
                                'utf-8')

                    r_reference.created_date = new_pub.created_date
                    r_reference.last_modified_date = new_pub.last_modified_date
                    if r_reference.cited_source_uid is not None:
                        counters.r_reference_seq = counters.r_reference_seq + 1
                        r_reference.id = counters.r_reference_seq
                        # inserting references into database
                        curs.execute(
                            "INSERT INTO wos_references(wos_reference_id,source_id,cited_source_uid,cited_title,cited_work,cited_author,cited_year,cited_page,created_date,last_modified_date,source_filename)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)ON CONFLICT ON CONSTRAINT wos_references_pk DO UPDATE SET source_id = excluded.source_id, cited_source_uid = excluded.cited_source_uid,cited_title = excluded.cited_title, cited_work = excluded.cited_work, cited_author = excluded.cited_author,cited_year = excluded.cited_year, cited_page = excluded.cited_page, created_date = excluded.created_date,last_modified_date = excluded.last_modified_date, source_filename = excluded.source_filename;",
                            (str(r_reference.id), str(r_reference.source_id),
                             str(r_reference.cited_source_uid),
                             str(r_reference.cited_title),
                             str(r_reference.cited_work),
                             str(r_reference.cited_author),
                             str(r_reference.cited_year),
                             str(r_reference.cited_page),
                             str(r_reference.created_date),
                             str(r_reference.last_modified_date),
                             str(new_pub.source_filename)))
                        '''writer_ref.writerow((r_reference['id'], r_reference['source_id'], \
                                             r_reference['cited_source_id'], r_reference['cited_title'], \
                                             r_reference['cited_work'], r_reference['cited_author'], \
                                             r_reference['cited_year'], r_reference['cited_page'], \
                                             r_reference['created_date'], r_reference['last_modified_date'], \
                                             r_publication['source_filename']))'''
            '''print "Processed", r_publication_seq, "records from", input_csv_dir + input_filename

                                                                                :-4] + "_publication.csv'" + " delimiter ',' CSV;\n"
            csvfile_load.write((copy_command))
            copy_command = "\\copy new_wos_references from '" + xml_csv_dir + input_filename[
                                                                              :-4] + "_reference.csv'" + " delimiter ',' CSV;\n"
            csvfile_load.write((copy_command))
            copy_command = "\\copy new_wos_grants from '" + xml_csv_dir + input_filename[
                                                                          :-4] + "_grant.csv'" + " delimiter ',' CSV;\n"
            csvfile_load.write((copy_command))
            copy_command = "\\copy new_wos_addresses from '" + xml_csv_dir + input_filename[
                                                                             :-4] + "_address.csv'" + " delimiter ',' CSV;\n"
            csvfile_load.write((copy_command))
            copy_command = "\\copy new_wos_authors from '" + xml_csv_dir + input_filename[
                                                                           :-4] + "_author.csv'" + " delimiter ',' CSV;\n"
            csvfile_load.write((copy_command))
            copy_command = "\\copy new_wos_document_identifiers from '" + xml_csv_dir + input_filename[
                                                                                        :-4] + "_dois.csv'" + " delimiter ',' CSV;\n"
            csvfile_load.write((copy_command))
            copy_command = "\\copy new_wos_abstracts from '" + xml_csv_dir + input_filename[
                                                                             :-4] + "_abstract.csv'" + " delimiter ',' CSV;\n"
            csvfile_load.write((copy_command))
            copy_command = "\\copy new_wos_keywords from '" + xml_csv_dir + input_filename[
                                                                            :-4] + "_keyword.csv'" + " delimiter ',' CSV;\n"
            csvfile_load.write((copy_command))
            copy_command = "\\copy new_wos_titles from '" + xml_csv_dir + input_filename[
                                                                          :-4] + "_title.csv'" + " delimiter ',' CSV;\n"
            csvfile_load.write((copy_command))

            # Close all opened files
            csvfile_publication.close()
            csvfile_reference.close()
            csvfile_abstract.close()
            csvfile_address.close()
            csvfile_author.close()
            csvfile_dois.close()
            csvfile_grant.close()
            csvfile_keyword.close()
            csvfile_title.close()
            csvfile_load.close()

                #print(rec.find(self.url + 'UID').text)'''

            # print('Database connection closed.')
        except ET.ParseError as error:
            print error
        return (counters)
Esempio n. 17
0
    def parse(self, xml_string, input_file_name, curs):
        url = '<xml header URL>'
        root = ET.fromstring(xml_string)
        for REC in root:
            # parse publications and create a publication object containing all the attributes of publication
            new_pub = pub.publication()


            new_pub.source_id = REC.find(url + 'UID').text

            pub_info = REC.find('.//' + url + 'pub_info')
            new_pub.source_type = pub_info.get('pubtype')

            source_title = REC.find('.//' + url + "title[@type='source']")

            if source_title is not None:
                if source_title.text is not None:
                    new_pub.source_title = source_title.text.encode('utf-8')
            # extracting values from properties of pub_info tag in XMl
            new_pub.has_abstract = pub_info.get('has_abstract')
            new_pub.publication_year = pub_info.get('pubyear')
            new_pub.issue = pub_info.get('issue')
            new_pub.volume = pub_info.get('vol')
            new_pub.pubmonth = pub_info.get('pubmonth')
            new_pub.publication_date = pub_info.get('sortdate')
            new_pub.coverdate = pub_info.get('coverdate')

            page_info = pub_info.find(url + 'page')
            if page_info is not None:
                new_pub.begin_page = page_info.get('begin')
                new_pub.end_page = page_info.get('end')

            document_title = REC.find('.//' + url + "title[@type='item']")
            if document_title is not None:
                if document_title.text is not None:
                    new_pub.document_title = document_title.text. \
                        encode('utf-8')

            document_type = REC.find('.//' + url + 'doctype')
            if document_type is not None:
                if document_type.text is not None:
                    new_pub.document_type = document_type.text

            publisher_name = REC.find('.//' + url + "name[@role='publisher']")
            if publisher_name is not None:
                pub_name = publisher_name.find('.//' + url + 'full_name')
                if pub_name is not None:
                    if pub_name.text is not None:
                        new_pub.publisher_name = pub_name.text. \
                            encode('utf-8')

            pub_address_no = REC.find('.//' + url + "address_spec[@addr_no='1']")
            if pub_address_no is not None:
                publisher_address = pub_address_no.find('.//' + url + 'full_address')
                if publisher_address is not None:
                    if publisher_address.text is not None:
                        new_pub.publisher_address = publisher_address.text. \
                            encode('utf-8')

            languages = REC.find('.//' + url + 'languages')
            if languages is not None:
                language = languages.find('.//' + url + 'language')
                if language is not None:
                    if language.text is not None:
                        new_pub.language = language.text.encode('utf-8')

            new_pub.edition = REC.find('.//' + url + 'edition').get('value')
            new_pub.source_filename = input_file_name
            new_pub.created_date = datetime.date.today()
            new_pub.last_modified_date = datetime.date.today()
            ## query to insert a publication record into the publications table in the database
            ## The query may be written into a saperate file in future from where it is read in the form of a string ammended values and executed to make code look better
            # TODO Query below is hard to read. I'd try a multi-line string with the proper SQL formatting.
            curs.execute(
                '<query to upsert data in database>')

            # parse grants in funding acknowledgements for each publication
            # New method of creating an object to store everything in the form of proper objects which could be developed into classes having their own properties in future
            r_grant = grant.grant()
            r_grant.source_id = new_pub.source_id

            # r_grant.funding_ack = ''
            FUNDING_ACK = REC.find('.//' + url + 'fund_text')

            if FUNDING_ACK is not None:  # if funding acknowledgement exists, then extract the r_grant(s) data
                funding_ack_p = FUNDING_ACK.find('.//' + url + 'p')
                if funding_ack_p is not None:
                    if funding_ack_p.text is not None:
                        r_grant.funding_ack = funding_ack_p.text.encode('utf-8')
            # looping through all the r_grant tags
            for l_grant in REC.findall('.//' + url + 'grant'):
                # r_grant.grant_agency = ''
                grant_agency = l_grant.find('.//' + url + 'grant_agency')
                if grant_agency is not None:
                    if grant_agency.text is not None:
                        r_grant.grant_agency = grant_agency.text.encode('utf-8')

                grant_ids = l_grant.find('.//' + url + 'grant_ids')
                if grant_ids is not None:
                    for grant_id in grant_ids.findall('.//' + url + 'grant_id'):
                        if grant_id is not None:
                            if grant_id.text is not None:
                                r_grant.grant_number = grant_id.text.encode('utf-8')
                        if r_grant.funding_ack is not None:
                            # insert the grant details in the grants table if there is any funding acknowledgement in the records
                            curs.execute(
                                '<query to upsert data in database>')


            # insert code to insert record in r_grant table
            # parse document object identifiers for each publication
            r_dois = dois.dois()
            r_dois.source_id = new_pub.source_id

            IDS = REC.find('.//' + url + 'identifiers')
            if IDS is not None:
                for identifier in IDS.findall('.//' + url + 'identifier'):
                    id_value = identifier.get('value')
                    if id_value is not None:
                        r_dois.doi = id_value.encode('utf-8')
                    id_type = identifier.get('type')
                    if id_type is not None:
                        r_dois.doi_type = id_type.encode('utf-8')
                    if r_dois.doi is not None:
                        # insering records into wos_document_identifier table
                        curs.execute(
                            '<query to upsert data in database>')


            # parse keyword for each publication
            keywords = REC.find('.//' + url + 'keywords_plus')
            if keywords is not None:
                r_keyword = key_word.wos_keyword()
                r_keyword.source_id = new_pub.source_id
                for keyword in keywords.findall('.//' + url + 'keyword'):
                    if keyword is not None:
                        if keyword.text is not None:
                            r_keyword.keyword = keyword.text.encode('utf-8')
                            # inserting records in wos_keywords
                            curs.execute(
                                '<query to upsert data in database>')


            # parse abstract for each publication
            if new_pub.has_abstract == 'Y':
                abstracts = REC.find('.//' + url + 'abstracts')
                if abstracts is not None:
                    r_abst = abst.abstract()
                    r_abst.source_id = new_pub.source_id
                    r_abstract_text = ''
                    for abstract_text in abstracts.findall('.//' + url + 'p'):
                        if abstract_text is not None:
                            if abstract_text.text is not None:
                                if r_abstract_text != '' and abstract_text.text != '':
                                    r_abstract_text = r_abstract_text.join('\n\n')
                                r_abstract_text = r_abstract_text + abstract_text.text.encode('utf-8')
                    # adding all the abstract paragraphs into one before writing it into the database
                    r_abst.abstract_text = re.sub( r"^[\n]+", "",r_abstract_text)
                    # writing the abstracts record into the data base
                    curs.execute(
                        '<query to upsert data in database>')



            # parse addresses for each publication

            r_addr = add.address()
            addr_no_list = []
            addresses = REC.find('.//' + url + 'addresses')
            for addr in addresses.findall('.//' + url + 'address_spec'):

                addr_ind = addr.get('addr_no')
                if addr_ind is None:
                    addr_ind = 0
                else:
                    addr_ind = int(addr_ind)
                    # Kepp all addr_no for the following reference by authors
                    addr_no_list.append(int(addr_ind))

                r_addr.source_id[addr_ind] = new_pub.source_id
                r_addr.addr_name[addr_ind] = ''
                addr_name = addr.find('.//' + url + 'full_address')
                if addr_name is not None:
                    if addr_name.text is not None:
                        r_addr.addr_name[addr_ind] = addr_name.text.encode('utf-8')
                r_addr.organization[addr_ind] = ''
                organization = addr.find('.//' + url + "organization[@pref='Y']")
                if organization is not None:
                    if organization.text is not None:
                        r_addr.organization[addr_ind] = organization.text. \
                            encode('utf-8')
                r_addr.sub_organization[addr_ind] = ''
                suborganization = addr.find('.//' + url + 'suborganization')
                if suborganization is not None:
                    if suborganization.text is not None:
                        r_addr.sub_organization[addr_ind] = suborganization.text. \
                            encode('utf-8')
                r_addr.city[addr_ind] = ''
                city = addr.find('.//' + url + 'city')
                if city is not None:
                    if city.text is not None:
                        r_addr.city[addr_ind] = city.text.encode('utf-8')
                r_addr.country[addr_ind] = ''
                country = addr.find('.//' + url + 'country')
                if country is not None:
                    if country.text is not None:
                        r_addr.country[addr_ind] = country.text.encode('utf-8')
                r_addr.zip_code[addr_ind] = ''
                addr_zip = addr.find('.//' + url + 'zip')
                if addr_zip is not None:
                    if addr_zip.text is not None:
                        r_addr.zip_code[addr_ind] = addr_zip.text.encode('utf-8')
                if r_addr.addr_name[addr_ind] is not None:
                    # Insering address records into database and retrieving and storing the address_id for future use in authors insertion
                    curs.execute(
                        '<query to upsert data in database>')
                    r_addr.id[addr_ind] = curs.fetchone()[0]


            # parse titles for each publication
            r_title = ti.title()
            r_title.source_id = new_pub.source_id

            summary = REC.find('.//' + url + 'summary')
            if summary is not None:
                titles = summary.find('.//' + url + 'titles')
                if titles is not None:
                    for title in titles.findall('.//' + url + 'title'):
                        if title is not None:
                            if title.text is not None:
                                r_title.title = title.text.encode('utf-8')
                                r_title.type = title.get('type')
                                # inserting titles into the database
                                curs.execute(
                                    '<query to upsert data in database>')


            # parse authors for each publication
            r_author = auth.author()
            r_author.source_id = new_pub.source_id

            summary = REC.find('.//' + url + 'summary')
            names = summary.find(url + 'names')
            for name in names.findall(url + "name[@role='author']"):
                full_name = name.find(url + 'full_name')
                if full_name is not None:
                    if full_name.text is not None:
                        r_author.full_name = full_name.text.encode('utf-8')
                wos_standard = name.find(url + 'wos_standard')
                if wos_standard is not None:
                    if wos_standard.text is not None:
                        r_author.wos_standard = wos_standard.text.encode('utf-8')
                r_author.first_name = ''
                first_name = name.find(url + 'first_name')
                if first_name is not None:
                    if first_name.text is not None:
                        r_author.first_name = first_name.text.encode('utf-8')
                last_name = name.find(url + 'last_name')
                if last_name is not None:
                    if last_name.text is not None:
                        r_author.last_name = last_name.text.encode('utf-8')
                email_addr = name.find(url + 'email_addr')
                if email_addr is not None:
                    if email_addr.text is not None:
                        r_author.email_addr = email_addr.text.encode('utf-8')

                r_author.seq_no = name.get('seq_no')
                r_author.dais_id = name.get('dais_id')
                if (r_author.dais_id == None):
                    r_author.dais_id = ''
                r_author.r_id = name.get('r_id')
                if (r_author.r_id == None):
                    r_author.r_id = ''
                addr_seqs = name.get('addr_no')
                r_author.address_id = ''
                r_author.addr_seq = ''
                if addr_seqs is not None:
                    addr_no_str = addr_seqs.split(' ')
                    for addr_seq in addr_no_str:
                        if addr_seq is not None:
                            addr_index = int(addr_seq)
                            if addr_index in addr_no_list:
                                r_author.address = r_addr.addr_name[addr_index]
                                r_author.address_id = r_addr.id[addr_index]
                                r_author.addr_seq = addr_seq
                                curs.execute(
                                    '<query to upsert data in database>')

                else:
                    r_author.address_id = 0
                    r_author.addr_seq = 0
                    # inserting records into author tables of database
                    curs.execute(
                        '<query to upsert data in database>')


            # parse reference data for each publication
            REFERENCES = REC.find('.//' + url + 'references')
            for ref in REFERENCES.findall('.//' + url + 'reference'):
                r_reference = reference.reference()
                r_reference.source_id = new_pub.source_id
                r_reference.cited_source_uid = None
                cited_source_id = ref.find('.//' + url + 'uid')
                if cited_source_id is not None:
                    if cited_source_id.text is not None:
                        r_reference.cited_source_uid = cited_source_id.text. \
                            encode('utf-8')
                cited_title = ref.find('.//' + url + 'citedTitle')
                if cited_title is not None:
                    if cited_title.text is not None:
                        r_reference.cited_title = cited_title.text.encode('utf-8')
                r_reference.cited_work = ''
                cited_work = ref.find('.//' + url + 'citedWork')
                if cited_work is not None:
                    if cited_work.text is not None:
                        r_reference.cited_work = cited_work.text.encode('utf-8')
                cited_author = ref.find('.//' + url + 'citedAuthor')
                if cited_author is not None:
                    if cited_author.text is not None:
                        r_reference.cited_author = cited_author.text.encode('utf-8')[:299]
                cited_year = ref.find('.//' + url + 'year')
                if cited_year is not None:
                    if cited_year.text is not None:
                        r_reference.cited_year = cited_year.text.encode('utf-8')
                cited_page = ref.find('.//' + url + 'page')
                if cited_page is not None:
                    if cited_page.text is not None:
                        r_reference.cited_page = cited_page.text.encode('utf-8')

                r_reference.created_date = new_pub.created_date
                r_reference.last_modified_date = new_pub.last_modified_date
                if r_reference.cited_source_uid is not None:
                    # inserting references into database
                    curs.execute(
                        '<query to upsert data in database>')
Esempio n. 18
0
def instruct():
    """Выводит на экран инструкцию"""
    print('''
    0- Выйти из программы
    1- создать новый vals файл
    2- обновить существующий vals файл
    3- перекодировать базу используя vals
    4- Разобрать xlsx файл на vars и vals
    ''')

while True:
    instruct()
    ask=int(input('введите действие: '))
    if ask==1:
        c = address(ask)
        if c[-3:] == 'csv':
            get_vals(c,read_csv,DataFrame)
        else:
            spss_to_vals(c)
    elif ask==2:
        c,k = address(ask)
        update_vals(c,k)
    elif ask==3:
        c,k = address(ask)
        recod_base(c,k)
    elif ask==4:
        a = address(ask)
        disintegration(a)
    elif ask==0:
        break
    def entry(self, ex_id):
        var1 = 1
        while var1 == 1:
            print "1 ADDRESS CHANGE"
            print "2 OPEN NEW ACCOUNT"
            print "3 MONEY DEPOSIT"
            print "4 MONEY WITHDRAWL"
            print "5 MONEY TRANSFER"
            print "6 PRINT STATEMENT"
            print "7 ACCOUNT CLOSURE"
            print "8 AVAIL LOAN"
            print "0 CUSTOMER LOGOUT"
            c = input()
            c = int(c)

            if c == 1:
                s2 = address()
                s2.add_change(ex_id)
            elif c == 3:
                s2 = money_dep()
                s2.money_deposit(ex_id)
            elif c == 4:

                s2 = money_wid()
                s2.money_withdrawl(ex_id)
            elif c == 6:
                pr = transPrint()
                pr.printi(ex_id)
            elif c == 5:

                s2 = money_transfer()
                print("enter the id to transfer money")
                cus = raw_input()
                cus = int(cus)
                conn = pymysql.connect("localhost",
                                       "root",
                                       "",
                                       "bankM5",
                                       autocommit=True)
                cur = conn.cursor()
                sq3 = "SELECT * FROM cust_info"
                cur.execute(sq3)
                r = cur.rowcount
                if (cus > r):
                    print("wrong id plz input again:")
                else:
                    cus = str(cus)
                    s2.tranfer(ex_id, cus)
            elif c == 7:
                s2 = acc_close()
                s2.account_closure(ex_id)
                var1 = 2
            elif c == 0:
                print "logged out"
                var1 = 2
            elif c == 2:

                s1 = open_new_acc()
                s1.open(ex_id)

            elif c == 8:

                s1 = open_new_loan()
                s1.open(ex_id)
            elif c > 8:
                print("wrong choice enter again")
Esempio n. 20
0
 def setUp(self):
     from address import address
     if self.adr_svc is None:
         self.adr_svc = address()
def lambda_handler(event, context):

    from address import address
    adr_svc = address()

    status_code = 200
    adr_svc.log.info(event)
    try:
        try:
            try:
                if event['context']['resource-path'] == "/address"\
                 and event['context']['http-method'] == "POST":
                    res = adr_svc.create_address(
                    json.dumps({
                        'address': event['body-json'],
                        'stage': event['context']['stage']
                        }).decode('utf-8')
                    )
                elif event['context']['resource-path'] == '/address'\
                and event['context']['http-method'] == "GET"\
                and event['params']['path'].get('addressId') is None:
                    res = adr_svc.list_addresses({
                          'address': None,
                          'stage': event['context']['stage']
                          })
                elif event['body']['context']['resource-path'] ==\
                 "/address/{addressId}"\
                and event['body']['context']['http-method'] == "PUT"\
                and event['path']['params'].get('addressId') is not None:
                    res = adr_svc.update_address({
                        'address': event['body-json'],
                        'stage': event['context']['stage']
                        })
                elif event['context']['resource-path'] ==\
                 '/address/{addressId}'\
                and event['context']['http-method'] == "GET"\
                and event['params']['path'].get('addressId') is not None:
                    res = adr_svc.get_address(
                        {'addressId': event['params']['path']['addressId'],
                          'stage': event['context']['stage']
                          })
                elif  event['context']['resource-path'] ==\
                 '/address/{addressId}'\
                and event['context']['http-method'] == "DELETE"\
                and event['path']['params'].get('addressId') is not None:
                    res = \
                    adr_svc.remove_address({
                        'addressId': event['params']['path']['addressId'],
                        'stage': event['context']['stage']
                          })
                else:
                    status_code = 403
                    res = "Request not valid"
            except KeyError as e:
                status_code = 404
                res = {
                    'Error': e.message,
                    'statusCode': status_code
                    }
                adr_svc.log.error(res)
        except urllib3.exceptions.ProtocolError as e:
            res = {
                'Error': e.message,
                'statusCode': e.status
                }
            adr_svc.log.error(res)
        output = res

    except botocore.exceptions.ClientError as e:
        output = json.dumps(
            {'status': e.response['ResponseMetadata']['HTTPStatusCode'],
                'body': 'Failed to update: %s' % e.response['Error']['Code']}
            )
        adr_svc.log.error(json.dumps(output))

    return output
Esempio n. 22
0
    def parse(self, xml_string, input_file_name, curs):
        url = '{http://clarivate.com/schema/wok5.27/public/FullRecord}'
        root = ET.fromstring(xml_string)
        for REC in root:
            # parse publications and create a publication object containing all the attributes of publication
            new_pub = pub.publication()


            new_pub.source_id = REC.find(url + 'UID').text

            pub_info = REC.find('.//' + url + 'pub_info')
            new_pub.source_type = pub_info.get('pubtype')

            source_title = REC.find('.//' + url + "title[@type='source']")

            if source_title is not None:
                if source_title.text is not None:
                    new_pub.source_title = source_title.text.encode('utf-8')
            # extracting values from properties of pub_info tag in XMl
            new_pub.has_abstract = pub_info.get('has_abstract')
            new_pub.publication_year = pub_info.get('pubyear')
            new_pub.issue = pub_info.get('issue')
            new_pub.volume = pub_info.get('vol')
            new_pub.pubmonth = pub_info.get('pubmonth')
            new_pub.publication_date = pub_info.get('sortdate')
            new_pub.coverdate = pub_info.get('coverdate')

            page_info = pub_info.find(url + 'page')
            if page_info is not None:
                new_pub.begin_page = page_info.get('begin')
                new_pub.end_page = page_info.get('end')

            document_title = REC.find('.//' + url + "title[@type='item']")
            if document_title is not None:
                if document_title.text is not None:
                    new_pub.document_title = document_title.text. \
                        encode('utf-8')

            document_type = REC.find('.//' + url + 'doctype')
            if document_type is not None:
                if document_type.text is not None:
                    new_pub.document_type = document_type.text

            publisher_name = REC.find('.//' + url + "name[@role='publisher']")
            if publisher_name is not None:
                pub_name = publisher_name.find('.//' + url + 'full_name')
                if pub_name is not None:
                    if pub_name.text is not None:
                        new_pub.publisher_name = pub_name.text. \
                            encode('utf-8')

            pub_address_no = REC.find('.//' + url + "address_spec[@addr_no='1']")
            if pub_address_no is not None:
                publisher_address = pub_address_no.find('.//' + url + 'full_address')
                if publisher_address is not None:
                    if publisher_address.text is not None:
                        new_pub.publisher_address = publisher_address.text. \
                            encode('utf-8')

            languages = REC.find('.//' + url + 'languages')
            if languages is not None:
                language = languages.find('.//' + url + 'language')
                if language is not None:
                    if language.text is not None:
                        new_pub.language = language.text.encode('utf-8')

            new_pub.edition = REC.find('.//' + url + 'edition').get('value')
            new_pub.source_filename = input_file_name
            new_pub.created_date = datetime.date.today()
            new_pub.last_modified_date = datetime.date.today()
            ## query to insert a publication record into the publications table in the database
            ## The query may be written into a saperate file in future from where it is read in the form of a string ammended values and executed to make code look better
            # TODO Query below is hard to read. I'd try a multi-line string with the proper SQL formatting.
            curs.execute(
                "INSERT INTO wos_publications(begin_page, created_date, document_title, document_type,edition, end_page,has_abstract,issue,"\
                    "language,last_modified_date,publication_date,publication_year,publisher_address,publisher_name,source_filename,source_id,"\
                    "source_title,source_type,volume)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) ON CONFLICT (source_id)"\
                    "DO UPDATE SET begin_page = excluded.begin_page, created_date = excluded.created_date,document_title ="\
                    " excluded.document_title, document_type = excluded.document_type, edition = excluded.edition,end_page ="\
                    "excluded.end_page, has_abstract = excluded.has_abstract, issue = excluded.issue,language = excluded.language,"\
                    "last_modified_date = excluded.last_modified_date,publication_date = excluded.publication_date, publication_year"\
                    "= excluded.publication_year,publisher_address = excluded.publisher_address, publisher_name = excluded.publisher_name,"\
                    "source_filename = excluded.source_filename, source_id = excluded.source_id, source_title = excluded.source_title,"\
                    "source_type = excluded.source_type, volume = excluded.volume, last_updated_time=current_timestamp;",
                (str(new_pub.begin_page), new_pub.created_date, str(new_pub.document_title),
                 str(new_pub.document_type), str(new_pub.edition), str(new_pub.end_page), str(new_pub.has_abstract),
                 str(new_pub.issue), str(new_pub.language), new_pub.last_modified_date,
                 new_pub.publication_date, str(new_pub.publication_year), str(new_pub.publisher_address),
                 str(new_pub.publisher_name), str(new_pub.source_filename), str(new_pub.source_id),
                 str(new_pub.source_title), new_pub.source_type, str(new_pub.volume)))

            # parse grants in funding acknowledgements for each publication
            # New method of creating an object to store everything in the form of proper objects which could be developed into classes having their own properties in future
            r_grant = grant.grant()
            r_grant.source_id = new_pub.source_id

            # r_grant.funding_ack = ''
            FUNDING_ACK = REC.find('.//' + url + 'fund_text')

            if FUNDING_ACK is not None:  # if funding acknowledgement exists, then extract the r_grant(s) data
                funding_ack_p = FUNDING_ACK.find('.//' + url + 'p')
                if funding_ack_p is not None:
                    if funding_ack_p.text is not None:
                        r_grant.funding_ack = funding_ack_p.text.encode('utf-8')
            # looping through all the r_grant tags
            for l_grant in REC.findall('.//' + url + 'grant'):
                # r_grant.grant_agency = ''
                grant_agency = l_grant.find('.//' + url + 'grant_agency')
                if grant_agency is not None:
                    if grant_agency.text is not None:
                        r_grant.grant_agency = grant_agency.text.encode('utf-8')

                grant_ids = l_grant.find('.//' + url + 'grant_ids')
                if grant_ids is not None:
                    for grant_id in grant_ids.findall('.//' + url + 'grant_id'):
                        if grant_id is not None:
                            if grant_id.text is not None:
                                r_grant.grant_number = grant_id.text.encode('utf-8')
                        if r_grant.funding_ack is not None:
                            # insert the grant details in the grants table if there is any funding acknowledgement in the records
                            curs.execute(
                                "INSERT INTO wos_grants(source_id,grant_number,grant_organization,funding_ack,source_filename)VALUES"\
                                        "(%s,%s,%s,%s,%s) ON CONFLICT (source_id, grant_number, grant_organization) DO UPDATE SET source_id"\
                                        "= excluded.source_id, grant_number = excluded.grant_number,grant_organization ="\
                                        "excluded.grant_organization, funding_ack = excluded.funding_ack,source_filename ="\
                                        "excluded.source_filename, last_updated_time=current_timestamp;",
                                (str(r_grant.source_id), str(r_grant.grant_number),
                                 str(r_grant.grant_agency), str(r_grant.funding_ack), str(new_pub.source_filename)))


            # insert code to insert record in r_grant table
            # parse document object identifiers for each publication
            r_dois = dois.dois()
            r_dois.source_id = new_pub.source_id

            IDS = REC.find('.//' + url + 'identifiers')
            if IDS is not None:
                for identifier in IDS.findall('.//' + url + 'identifier'):
                    id_value = identifier.get('value')
                    if id_value is not None:
                        r_dois.doi = id_value.encode('utf-8')
                    id_type = identifier.get('type')
                    if id_type is not None:
                        r_dois.doi_type = id_type.encode('utf-8')
                    if r_dois.doi is not None:
                        # insering records into wos_document_identifier table
                        curs.execute(
                            "INSERT INTO wos_document_identifiers(source_id,document_id,document_id_type,source_filename)VALUES(%s,%s,%s,%s)"\
                                "ON CONFLICT (source_id, document_id_type, document_id) DO UPDATE SET source_id = excluded.source_id,"\
                                "document_id = excluded.document_id,document_id_type = excluded.document_id_type, source_filename ="\
                                "excluded.source_filename, last_updated_time=current_timestamp;",
                            (str(r_dois.source_id), str(r_dois.doi), str(r_dois.doi_type),
                             str(new_pub.source_filename)))


            # parse keyword for each publication
            keywords = REC.find('.//' + url + 'keywords_plus')
            if keywords is not None:
                r_keyword = key_word.wos_keyword()
                r_keyword.source_id = new_pub.source_id
                for keyword in keywords.findall('.//' + url + 'keyword'):
                    if keyword is not None:
                        if keyword.text is not None:
                            r_keyword.keyword = keyword.text.encode('utf-8')
                            # inserting records in wos_keywords
                            curs.execute(
                                "INSERT INTO wos_keywords(source_id,keyword,source_filename)VALUES(%s,%s,%s)ON CONFLICT"\
                                    "(source_id, keyword) DO UPDATE SET source_id = excluded.source_id, keyword = excluded.keyword,"\
                                    "source_filename = excluded.source_filename, last_updated_time=current_timestamp;",
                                (str(r_keyword.source_id), str(r_keyword.keyword),
                                 str(new_pub.source_filename)))


            # parse abstract for each publication
            if new_pub.has_abstract == 'Y':
                abstracts = REC.find('.//' + url + 'abstracts')
                if abstracts is not None:
                    r_abst = abst.abstract()
                    r_abst.source_id = new_pub.source_id
                    r_abstract_text = ''
                    for abstract_text in abstracts.findall('.//' + url + 'p'):
                        if abstract_text is not None:
                            if abstract_text.text is not None:
                                if r_abstract_text != '' and abstract_text.text != '':
                                    r_abstract_text = r_abstract_text.join('\n\n')
                                r_abstract_text = r_abstract_text + abstract_text.text.encode('utf-8')
                    # adding all the abstract paragraphs into one before writing it into the database
                    r_abst.abstract_text = re.sub( r"^[\n]+", "",r_abstract_text)
                    # writing the abstracts record into the data base
                    curs.execute(
                        "INSERT INTO wos_abstracts(source_id,abstract_text,source_filename)VALUES(%s,%s,%s) ON CONFLICT(source_id) DO UPDATE"\
                            " SET source_id = excluded.source_id,abstract_text = excluded.abstract_text,source_filename = excluded.source_filename, last_updated_time=current_timestamp;",
                        (str(r_abst.source_id), str(r_abst.abstract_text), str(new_pub.source_filename)))



            # parse addresses for each publication

            r_addr = add.address()
            addr_no_list = []
            addresses = REC.find('.//' + url + 'addresses')
            for addr in addresses.findall('.//' + url + 'address_spec'):

                addr_ind = addr.get('addr_no')
                if addr_ind is None:
                    addr_ind = 0
                else:
                    addr_ind = int(addr_ind)
                    # Kepp all addr_no for the following reference by authors
                    addr_no_list.append(int(addr_ind))

                r_addr.source_id[addr_ind] = new_pub.source_id
                r_addr.addr_name[addr_ind] = ''
                addr_name = addr.find('.//' + url + 'full_address')
                if addr_name is not None:
                    if addr_name.text is not None:
                        r_addr.addr_name[addr_ind] = addr_name.text.encode('utf-8')
                r_addr.organization[addr_ind] = ''
                organization = addr.find('.//' + url + "organization[@pref='Y']")
                if organization is not None:
                    if organization.text is not None:
                        r_addr.organization[addr_ind] = organization.text. \
                            encode('utf-8')
                r_addr.sub_organization[addr_ind] = ''
                suborganization = addr.find('.//' + url + 'suborganization')
                if suborganization is not None:
                    if suborganization.text is not None:
                        r_addr.sub_organization[addr_ind] = suborganization.text. \
                            encode('utf-8')
                r_addr.city[addr_ind] = ''
                city = addr.find('.//' + url + 'city')
                if city is not None:
                    if city.text is not None:
                        r_addr.city[addr_ind] = city.text.encode('utf-8')
                r_addr.country[addr_ind] = ''
                country = addr.find('.//' + url + 'country')
                if country is not None:
                    if country.text is not None:
                        r_addr.country[addr_ind] = country.text.encode('utf-8')
                r_addr.zip_code[addr_ind] = ''
                addr_zip = addr.find('.//' + url + 'zip')
                if addr_zip is not None:
                    if addr_zip.text is not None:
                        r_addr.zip_code[addr_ind] = addr_zip.text.encode('utf-8')
                if r_addr.addr_name[addr_ind] is not None:
                    # Insering address records into database and retrieving and storing the address_id for future use in authors insertion
                    curs.execute(
                        "INSERT INTO wos_addresses(source_id,address_name,organization,sub_organization,city,country,zip_code,source_filename)"\
                            "VALUES(%s,%s,%s,%s,%s,%s,%s,%s)ON CONFLICT (source_id, address_name) DO UPDATE SET source_id = excluded.source_id,"\
                            "address_name = excluded.address_name,organization = excluded.organization, sub_organization = excluded.sub_organization,"\
                            "city = excluded.city,country = excluded.country, zip_code = excluded.zip_code, source_filename = excluded.source_filename RETURNING id, last_updated_time=current_timestamp;",
                        (str(r_addr.source_id[addr_ind]), str(r_addr.addr_name[addr_ind]),
                         str(r_addr.organization[addr_ind]), str(r_addr.sub_organization[addr_ind]),
                         str(r_addr.city[addr_ind]), str(r_addr.country[addr_ind]), str(r_addr.zip_code[addr_ind]),
                         str(new_pub.source_filename)))
                    r_addr.id[addr_ind] = curs.fetchone()[0]


            # parse titles for each publication
            r_title = ti.title()
            r_title.source_id = new_pub.source_id

            summary = REC.find('.//' + url + 'summary')
            if summary is not None:
                titles = summary.find('.//' + url + 'titles')
                if titles is not None:
                    for title in titles.findall('.//' + url + 'title'):
                        if title is not None:
                            if title.text is not None:
                                r_title.title = title.text.encode('utf-8')
                                r_title.type = title.get('type')
                                # inserting titles into the database
                                curs.execute(
                                    "INSERT INTO wos_titles(source_id,title,type,source_filename)VALUES(%s,%s,%s,%s)ON CONFLICT (source_id, type)"\
                                        "DO UPDATE SET source_id = excluded.source_id, title = excluded.title, type = excluded.type,source_filename ="\
                                        "excluded.source_filename, last_updated_time=current_timestamp;",
                                    (str(r_title.source_id), str(r_title.title), str(r_title.type),
                                     str(new_pub.source_filename)))


            # parse subjects for each publication
            r_subjects = sb.subjects()
            r_subjects.source_id = new_pub.source_id

            subjects = REC.find('.//' + url + 'subjects')
            if subjects is not None:
                for subject in subjects.findall('.//' + url + 'subject'):
                    if subject is not None:
                        if subject.text is not None:
                            r_subjects.subject = subject.text.encode('utf-8')
                            r_subjects.subject_classification_type = subject.get('ascatype')
                            #inserting subjects into the database
                            curs.execute(
                                    "INSERT INTO wos_publication_subjects(source_id,subject_classification_type,subject,source_filename)VALUES(%s,%s,%s,%s)ON CONFLICT (source_id,subject_classification_type,subject)"\
                                        "DO UPDATE SET source_id = excluded.source_id, subject_classification_type = excluded.subject_classification_type, subject = excluded.subject,source_filename ="\
                                        "excluded.source_filename, last_updated_time=current_timestamp;",
                                    (str(r_subjects.source_id), str(r_subjects.subject_classification_type), str(r_subjects.subject),
                                     str(new_pub.source_filename)))


            # parse authors for each publication
            r_author = auth.author()
            r_author.source_id = new_pub.source_id

            summary = REC.find('.//' + url + 'summary')
            names = summary.find(url + 'names')
            for name in names.findall(url + "name[@role='author']"):
                full_name = name.find(url + 'full_name')
                if full_name is not None:
                    if full_name.text is not None:
                        r_author.full_name = full_name.text.encode('utf-8')
                wos_standard = name.find(url + 'wos_standard')
                if wos_standard is not None:
                    if wos_standard.text is not None:
                        r_author.wos_standard = wos_standard.text.encode('utf-8')
                r_author.first_name = ''
                first_name = name.find(url + 'first_name')
                if first_name is not None:
                    if first_name.text is not None:
                        r_author.first_name = first_name.text.encode('utf-8')
                last_name = name.find(url + 'last_name')
                if last_name is not None:
                    if last_name.text is not None:
                        r_author.last_name = last_name.text.encode('utf-8')
                email_addr = name.find(url + 'email_addr')
                if email_addr is not None:
                    if email_addr.text is not None:
                        r_author.email_addr = email_addr.text.encode('utf-8')

                r_author.seq_no = name.get('seq_no')
                r_author.dais_id = name.get('dais_id')
                if (r_author.dais_id == None):
                    r_author.dais_id = ''
                r_author.r_id = name.get('r_id')
                if (r_author.r_id == None):
                    r_author.r_id = ''
                addr_seqs = name.get('addr_no')
                r_author.address_id = ''
                r_author.addr_seq = ''
                if addr_seqs is not None:
                    addr_no_str = addr_seqs.split(' ')
                    for addr_seq in addr_no_str:
                        if addr_seq is not None:
                            addr_index = int(addr_seq)
                            if addr_index in addr_no_list:
                                r_author.address = r_addr.addr_name[addr_index]
                                r_author.address_id = r_addr.id[addr_index]
                                r_author.addr_seq = addr_seq
                                curs.execute(
                                    "INSERT INTO wos_authors(source_id,full_name,last_name,first_name,seq_no,address_seq,address,email_address,address_id,"\
                                        "dais_id,r_id,source_filename)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) ON CONFLICT (source_id, seq_no, address_id)"\
                                        "DO UPDATE SET source_id = excluded.source_id, full_name = excluded.full_name,last_name = excluded.last_name, first_name"\
                                        "= excluded.first_name, seq_no = excluded.seq_no,address_seq = excluded.address_seq, address = excluded.address,"\
                                        "email_address = excluded.email_address,address_id = excluded.address_id, dais_id = excluded.dais_id, r_id ="\
                                        "excluded.r_id,source_filename = excluded.source_filename, last_updated_time=current_timestamp;",
                                    (str(r_author.source_id), str(r_author.full_name),
                                     str(r_author.last_name), str(r_author.first_name), str(r_author.seq_no),
                                     str(r_author.addr_seq), str(r_author.address), str(r_author.email_addr),
                                     str(r_author.address_id), str(r_author.dais_id), str(r_author.r_id),
                                     str(new_pub.source_filename)))

                else:
                    r_author.address_id = 0
                    r_author.addr_seq = 0
                    # inserting records into author tables of database
                    curs.execute(
                        "INSERT INTO wos_authors(source_id,full_name,last_name,first_name,seq_no,email_address,dais_id,r_id,source_filename)"\
                            "VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s)ON CONFLICT (source_id, seq_no, address_id) DO UPDATE SET source_id ="\
                            "excluded.source_id, full_name = excluded.full_name,last_name = excluded.last_name, first_name ="\
                            "excluded.first_name, seq_no = excluded.seq_no,address_seq = excluded.address_seq, address = excluded.address,"\
                            "email_address = excluded.email_address,address_id = excluded.address_id, dais_id = excluded.dais_id, r_id ="\
                            "excluded.r_id,source_filename = excluded.source_filename, last_updated_time=current_timestamp;",
                        (
                            str(r_author.source_id), str(r_author.full_name), str(r_author.last_name),
                            str(r_author.first_name), str(r_author.seq_no), str(r_author.email_addr),
                            str(r_author.dais_id), str(r_author.r_id), str(new_pub.source_filename)))


            # parse reference data for each publication
            REFERENCES = REC.find('.//' + url + 'references')
            for ref in REFERENCES.findall('.//' + url + 'reference'):
                try:
                    r_reference = reference.reference()
                    r_reference.source_id = new_pub.source_id
                    r_reference.cited_source_uid = None
                    cited_source_id = ref.find('.//' + url + 'uid')
                    if cited_source_id is not None:
                        if cited_source_id.text is not None:
                            r_reference.cited_source_uid = cited_source_id.text. \
                                encode('utf-8')
                    cited_title = ref.find('.//' + url + 'citedTitle')
                    if cited_title is not None:
                        if cited_title.text is not None:
                            r_reference.cited_title = cited_title.text.encode('utf-8')
                    r_reference.cited_work = ''
                    cited_work = ref.find('.//' + url + 'citedWork')
                    if cited_work is not None:
                        if cited_work.text is not None:
                            r_reference.cited_work = cited_work.text.encode('utf-8')
                    cited_author = ref.find('.//' + url + 'citedAuthor')
                    if cited_author is not None:
                        if cited_author.text is not None:
                            r_reference.cited_author = cited_author.text.encode('utf-8')[:299]
                    cited_year = ref.find('.//' + url + 'year')
                    if cited_year is not None:
                        if cited_year.text is not None:
                            r_reference.cited_year = cited_year.text.encode('utf-8')
                    cited_page = ref.find('.//' + url + 'page')
                    if cited_page is not None:
                        if cited_page.text is not None:
                            r_reference.cited_page = cited_page.text.encode('utf-8')

                    r_reference.created_date = new_pub.created_date
                    r_reference.last_modified_date = new_pub.last_modified_date
                    if r_reference.cited_source_uid is not None:
                        # inserting references into database
                        curs.execute(
                            "INSERT INTO wos_references(source_id,cited_source_uid,cited_title,cited_work,cited_author,cited_year,cited_page,"\
                                "created_date,last_modified_date,source_filename)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)ON CONFLICT ON CONSTRAINT"\
                                " wos_references_pk DO UPDATE SET source_id = excluded.source_id, cited_source_uid = excluded.cited_source_uid,"\
                                "cited_title = excluded.cited_title, cited_work = excluded.cited_work, cited_author = excluded.cited_author,"\
                                "cited_year = excluded.cited_year, cited_page = excluded.cited_page, created_date = excluded.created_date,"\
                                "last_modified_date = excluded.last_modified_date, source_filename = excluded.source_filename, last_updated_time=current_timestamp;",
                            (str(r_reference.source_id), str(r_reference.cited_source_uid),
                             str(r_reference.cited_title), str(r_reference.cited_work), str(r_reference.cited_author),
                             str(r_reference.cited_year), str(r_reference.cited_page), str(r_reference.created_date),
                             str(r_reference.last_modified_date), str(new_pub.source_filename)))
                except Exception:
                    print "ERROR occurred for the following reference record:\n", r_reference
                    raise
Esempio n. 23
0
#! /usr/bin/python3

from name_module import print_name
import address
import sys

print(dir(sys))
print_name('Himanshu')
address.address("210 Hawkes bay, California")