def __init__(self, value=0x0000): valid_value(value, MIN_VALUE, MAX_VALUE) self.map = [] for cont in range(MAX_ADDR // 2): self.map.append(address.address(2 * cont, value // 0x100)) self.map.append( address.address(2 * cont + 1, value - (value // 0x100) * 0x100)) self.map.append(address.address(MAX_ADDR - 1, 0xff)) self.map.append(address.address(MAX_ADDR, 0xfc))
def loadModule(self, moduleName): try: if moduleName == 'nospam': import nospam modObject = nospam.nospam(self) elif moduleName == 'noflood': import noflood modObject = noflood.noflood(self) elif moduleName == 'address': import address modObject = address.address(self) elif moduleName == 'akick': import akick modObject = akick.akick(self) elif moduleName == 'nobots': import nobots modObject = nobots.nobots(self) elif moduleName == 'noinsult': import noinsult modObject = noinsult.noinsult(self) else: self.logger.log( 0, 'DEBUG', u'[Función loadModule]: El módulo %s no existe.' % moduleName) return False except ImportError: return False self.moduleList[moduleName] = modObject return True
def loadModule(self, moduleName): try: if moduleName == 'nospam': import nospam modObject = nospam.nospam(self) elif moduleName == 'noflood': import noflood modObject = noflood.noflood(self) elif moduleName == 'address': import address modObject = address.address(self) elif moduleName == 'akick': import akick modObject = akick.akick(self) elif moduleName == 'nobots': import nobots modObject = nobots.nobots(self) elif moduleName == 'noinsult': import noinsult modObject = noinsult.noinsult(self) else: self.logger.log(0, 'DEBUG', u'[Función loadModule]: El módulo %s no existe.' % moduleName) return False except ImportError: return False self.moduleList[moduleName] = modObject return True
def execute(message): msg = message.decode("utf-8") msg.strip() lista = msg.split() print(lista) command = lista[0].upper() if len(lista) > 1 and command in ("INSERT", "QUERY"): key = int(lista[1]) location = address(key, Bucket.fs) if location != Bucket.bucketNbr: return Bucket.forward(location, msg) try: if command == "INSERT": response = Bucket.insert(int(lista[1]), lista[2]) if len(lista) > 3 and lista[3] == "FWD": response = "IAM {}".format(Bucket.fs.extent) return response elif command == "QUERY": return Bucket.query(int(lista[1])) elif command == "POPULATION": Bucket.population(lista) return "ACK" elif command == "REHASH": Bucket.fs = FileState(int(lista[1])) Bucket.rehash(Bucket.fs) return "ACK" elif command == "SHOW": return Bucket.show() else: #TODO return "NOPE" except KeyError: return "key error"
def __init__(self): self.firstName = "Yan" self.lastName = "Carvalho Borges" self.birth = "21/03/1997" self.gender = "Male" self.address = address('Brasil', 'Cruzeiro', 'Rua Maria José Tabaco', '170') self.occupation = None
def main(): last = "Trentwood OR 94701" delivery = "1402 SW Alder st." second = "APT 11" testAddress = address(last, delivery, second) testRecipient = "John Doe" last2 = "Oakland CA 94501" delivery2 = "1244 Broadway st." second2 = "APT 11" testAddress2 = address(last2, delivery2, second2) testRecipient2 = "Lucas Rondenet" testContact = contact(testRecipient) testContact2 = contact(testRecipient2) testEmail = "*****@*****.**" testEmail2 = "*****@*****.**" testPhoneNumber = "542-345-6745" testEmail3 = "*****@*****.**" testEmail4 = "*****@*****.**" testPhoneNumber2 = "545-565-7889" testContact.addAddress(testAddress) testContact.addEmail(testEmail) testContact.addEmail(testEmail2) testContact.addPhoneNumber(testPhoneNumber) testContact2.addAddress(testAddress2) testContact2.addEmail(testEmail3) testContact2.addEmail(testEmail4) testContact2.addPhoneNumber(testPhoneNumber2) print(testContact) #testContact.removeAddress(testAddress) print(testContact) print print(testContact2) testContact.addField("age", "32") testContact3 = contact("John Newhall") print(testContact.age) print(vars(testContact)) print(vars(testContact2)) print(vars(testContact3))
def setUp(self): from address import address if self.adr_svc is None: self.adr_svc = address() if len(self.adrs) is 0: self.adrs =\ self.adr_svc.list_addresses({ 'address': None, 'stage': 'dev' })
def get_addresses(self): try: addresses = self._data["results"][0]["addresses"] valid_addresses = [] for add in addresses: if add["address_purpose"] == "LOCATION": valid_addresses.append(address(add)) return valid_addresses except: pass
def addressBookImport(self, fileName, app): if os.path.exists(fileName): f = open(fileName, 'r') data = utils.importParse(f) for element in data: newContact = contact(element['Recipient']) newAddress = address(element['Last'], element['Delivery'], element['Second']) newContact.addAddress(newAddress) if 'Phone' in element: newContact.addPhoneNumber(element['Phone']) if 'Email' in element: newContact.addEmail(element['Email']) self.addContact(newContact) app.cmdUpdateListbox(self.contacts)
def main(): last = "San Diego OR 94501" delivery = "1402 SW Alder st." second = "APT 11" test = address(last, delivery, second) print(test) print(test.addressNumber) print(test.address) print(test.city) print(test.state) print(test.zip) last2 = "Oakland CA 94501" delivery2 = "1235 Broadway st." second2 = " " test2 = address(last2, delivery2, second2) print(test2) print(test2.addressNumber) print(test2.address) print(test2.city) print(test2.state) print(test2.zip) '''test3 = address(last2, delivery2, second2)
def cmdAdd(self): """create a new contact based on the contact's name""" self.tempContact = contact.contact(self.entryName.get()) """build a temp address""" self.tempAddress = address.address(self.entryAddressLast.get(),self.entryAddressDelivery.get(),self.entryAddressSecond.get()) """add the address to the contact object""" self.tempContact.addAddress(self.tempAddress) """add the email address to the contact object""" self.tempContact.addEmail(self.entryEmail.get()) """get and add the phone number to the contact object""" self.tempContact.addPhoneNumber(self.entryPhone.get()) """add the contact to the address book""" self.logic.addContact(self.tempContact) """update the listbox""" self.cmdUpdateListbox(self.logic.contacts) """set the addressbook state to unsaved""" self.unSavedChanges = 1
def match_algo(nric, req_location, date, startTime, endTime, req_num_kids, req_pay_amt): score = 0 cs = CSV_reader() cs.read_file() weighted_users = [] time_date = {} # print(date) # print(cs.users) # print("hello") for i in range(len(date)): current_date = date[i].split('-') int_date = map(int, current_date) day_of_week = str(datetime.date(int_date[0], int_date[1], int_date[2]).weekday() + 1) duration = startTime[i] + '-' + endTime[i] if (day_of_week) in time_date.keys(): time_date[day_of_week].append(duration) else: time_date[day_of_week] = [] time_date[day_of_week].append(duration) for u in cs.users: if (u.id == nric): continue num_kids = numOfKids(req_num_kids, u.n_kids) req_pay_amt = float(req_pay_amt) pay_amt = payment(req_pay_amt, u.min_amt) # print(time_date) # print(u.time_available) time_matched = timeMatching(time_date, u.time_available) distance = address(u.location, req_location) if (time_matched==0 | num_kids==0): continue score = 0.5*float(time_matched) + 0.3*float(distance) + 0.2*float(pay_amt) weighted_users.append([u.id, score]) sorted_weighted_users = sorted(weighted_users, key=itemgetter(1), reverse=True) print(sorted_weighted_users)
def rehash(fs): print("Rehashing") print(fs) deleteList = [] for key in Bucket.dicc: location = address(key, fs) if location == Bucket.bucketNbr: print("No need to rehash for key {}".format(key)) continue sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) if location not in Bucket.bucketList: try: # Connect to server and send data sock.connect((Bucket.coHost, Bucket.coPort)) data = "POPULATE" sock.sendall(bytes(data + "\n", "utf-8")) # Receive data from the server received = str(sock.recv(1024), "utf-8") finally: #Close connection sock.close() #process reply reply = received.split() if reply[0] == "POPULATION": Bucket.population(reply) destAddress = Bucket.bucketList[location].split() destHost, destPort = destAddress[0], int(destAddress[1]) sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: sock.connect((destHost, destPort)) data = "INSERT {} {}".format(key, Bucket.dicc[key]) sock.sendall(bytes(data + "\n", "utf-8")) received = str(sock.recv(1024), "utf-8") if received == "ACK": deleteList.append(key) finally: #Close connection sock.close() print(received) for key in deleteList: Bucket.dicc.pop(key)
def main(): testAddressBook = addressbook() testContact1 = contact("John Doe") testLast1 = "San Diego CA 94501" testDelivery1 = "1401 SW Main St." testSecond1 = "" testAddr1 = address(testLast1, testDelivery1, testSecond1) testEmail1 = "*****@*****.**" testEmail2 = "*****@*****.**" testPhoneNumber1 = "542-345-6745" testContact1.addAddress(testAddr1) testContact1.addEmail(testEmail1) testContact1.addEmail(testEmail2) testContact1.addPhoneNumber(testPhoneNumber1) print(testContact1) print(testContact1.mailingFormat()) #print(testContact1.city) #print(testContact1.state) #print(testContact1.zip) '''testContact2 = contact("Mary Sue")
def execute(message): msg=message.decode("utf-8") msg.strip() lista = msg.split() print(lista) command = lista[0].upper() if len(lista) > 1 and command in ("INSERT", "QUERY"): key = int(lista[1]) location = address(key, Bucket.fs) if location != Bucket.bucketNbr: return Bucket.forward(location, msg) try: if command == "INSERT": if len(lista) < 3: return "Invalid Command." else: return Coordinator.insert(int(lista[1]), lista[2]) #return "ACK" elif command == "QUERY": if len(lista) < 2: return "Invalid Command." else: return Bucket.query(int(lista[1])) elif command == "REGISTER": bucketNbr = Coordinator.totalBuckets Coordinator.totalBuckets += 1 Bucket.bucketList[bucketNbr] = "{0} {1}".format(lista[1], lista[2]) print(Bucket.bucketList) return "{}".format(bucketNbr) elif command == "POPULATE": return "POPULATION "+' '.join("{} {}".format(k,v) for k,v in Bucket.bucketList.items()) elif command == "SPLIT": return Coordinator.split() elif command == "SHOW": return Coordinator.show() else: return "NOPE" except KeyError: return "key error"
def parse(self, xml_string, counters, input_file_name, curs): url = '{http://clarivate.com/schema/wok5.27/public/FullRecord}' try: root = ET.fromstring(xml_string) for REC in root: # parse publications and create a publication object containing all the attributes of publication new_pub = pub.publication() # old method commented # r_publication = dict() # Couter class to generate surrogate ids temporary for now later they will be replaced by auto incremental collumns in data base counters.r_publication_seq += 1 new_pub.id = counters.r_publication_seq # Finding UID in the xml by finding the UID tag inside a record new_pub.source_id = REC.find(url + 'UID').text pub_info = REC.find('.//' + url + 'pub_info') new_pub.source_type = pub_info.get('pubtype') source_title = REC.find('.//' + url + "title[@type='source']") if source_title is not None: if source_title.text is not None: new_pub.source_title = source_title.text.encode( 'utf-8') # extracting values from properties of pub_info tag in XMl new_pub.has_abstract = pub_info.get('has_abstract') new_pub.publication_year = pub_info.get('pubyear') new_pub.issue = pub_info.get('issue') new_pub.volume = pub_info.get('vol') new_pub.pubmonth = pub_info.get('pubmonth') new_pub.publication_date = pub_info.get('sortdate') new_pub.coverdate = pub_info.get('coverdate') page_info = pub_info.find(url + 'page') if page_info is not None: new_pub.begin_page = page_info.get('begin') new_pub.end_page = page_info.get('end') document_title = REC.find('.//' + url + "title[@type='item']") if document_title is not None: if document_title.text is not None: new_pub.document_title = document_title.text. \ encode('utf-8') document_type = REC.find('.//' + url + 'doctype') if document_type is not None: if document_type.text is not None: new_pub.document_type = document_type.text publisher_name = REC.find('.//' + url + "name[@role='publisher']") if publisher_name is not None: pub_name = publisher_name.find('.//' + url + 'full_name') if pub_name is not None: if pub_name.text is not None: new_pub.publisher_name = pub_name.text. \ encode('utf-8') pub_address_no = REC.find('.//' + url + "address_spec[@addr_no='1']") if pub_address_no is not None: publisher_address = pub_address_no.find('.//' + url + 'full_address') if publisher_address is not None: if publisher_address.text is not None: new_pub.publisher_address = publisher_address.text. \ encode('utf-8') # r_publication['language'] = '' languages = REC.find('.//' + url + 'languages') if languages is not None: language = languages.find('.//' + url + 'language') if language is not None: if language.text is not None: new_pub.language = language.text.encode('utf-8') new_pub.edition = REC.find('.//' + url + 'edition').get('value') new_pub.source_filename = input_file_name new_pub.created_date = datetime.date.today() new_pub.last_modified_date = datetime.date.today() ## query to insert a publication record into the publications table in the database ## The query may be written into a saperate file in future from where it is read in the form of a string ammended values and executed to make code look better curs.execute( "INSERT INTO wos_publications(begin_page, created_date, document_title, document_type,edition, end_page,has_abstract,id,issue,language,last_modified_date,publication_date,publication_year,publisher_address,publisher_name,source_filename,source_id,source_title,source_type,volume)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) ON CONFLICT (source_id) DO UPDATE SET begin_page = excluded.begin_page, created_date = excluded.created_date,document_title = excluded.document_title, document_type = excluded.document_type, edition = excluded.edition,end_page = excluded.end_page, has_abstract = excluded.has_abstract, id = excluded.id, issue = excluded.issue,language = excluded.language, last_modified_date = excluded.last_modified_date,publication_date = excluded.publication_date, publication_year = excluded.publication_year,publisher_address = excluded.publisher_address, publisher_name = excluded.publisher_name,source_filename = excluded.source_filename, source_id = excluded.source_id, source_title = excluded.source_title,source_type = excluded.source_type, volume = excluded.volume;", (str(new_pub.begin_page), new_pub.created_date, str(new_pub.document_title), str(new_pub.document_type), str(new_pub.edition), str(new_pub.end_page), str(new_pub.has_abstract), str( new_pub.id), str(new_pub.issue), str( new_pub.language), new_pub.last_modified_date, new_pub.publication_date, str(new_pub.publication_year), str(new_pub.publisher_address), str( new_pub.publisher_name), str(new_pub.source_filename), str(new_pub.source_id), str(new_pub.source_title), new_pub.source_type, str(new_pub.volume))) ##old code for writing the publications data into a CSV file '''writer_pub.writerow((r_publication['id'], r_publication['source_id'], \ r_publication['source_type'], r_publication['source_title'], \ r_publication['language'], r_publication['document_title'], \ r_publication['document_type'], r_publication['has_abstract'], \ r_publication['issue'], r_publication['volume'], \ r_publication['begin_page'], r_publication['end_page'], \ r_publication['publisher_name'], r_publication['publisher_address'], \ r_publication['publication_year'], r_publication['publication_date'], \ r_publication['created_date'], r_publication['last_modified_date'], \ r_publication['edition'], r_publication['source_filename']))''' # parse grants in funding acknowledgements for each publication # old method of creating a dict type # r_grant = dict( ) # New method of creating an object to store everything in the form of proper objects which could be developed into classes having their own properties in future r_grant = grant.grant() r_grant.source_id = new_pub.source_id # r_grant.funding_ack = '' FUNDING_ACK = REC.find('.//' + url + 'fund_text') if FUNDING_ACK is not None: # if funding acknowledgement exists, then extract the r_grant(s) data funding_ack_p = FUNDING_ACK.find('.//' + url + 'p') if funding_ack_p is not None: if funding_ack_p.text is not None: r_grant.funding_ack = funding_ack_p.text.encode( 'utf-8') # looping through all the r_grant tags for l_grant in REC.findall('.//' + url + 'grant'): # r_grant.grant_agency = '' grant_agency = l_grant.find('.//' + url + 'grant_agency') if grant_agency is not None: if grant_agency.text is not None: r_grant.grant_agency = grant_agency.text.encode( 'utf-8') grant_ids = l_grant.find('.//' + url + 'grant_ids') if grant_ids is not None: for grant_id in grant_ids.findall('.//' + url + 'grant_id'): counters.r_grant_seq = counters.r_grant_seq + 1 r_grant.id = counters.r_grant_seq # r_grant.grant_number = '' if grant_id is not None: if grant_id.text is not None: r_grant.grant_number = grant_id.text.encode( 'utf-8') if r_grant.funding_ack is not None: # insert the grant details in the grants table if there is any funding acknowledgement in the records curs.execute( "INSERT INTO wos_grants(id,source_id,grant_number,grant_organization,funding_ack,source_filename)VALUES(%s,%s,%s,%s,%s,%s) ON CONFLICT (source_id, grant_number, grant_organization) DO UPDATE SET id = excluded.id, source_id = excluded.source_id, grant_number = excluded.grant_number,grant_organization = excluded.grant_organization, funding_ack = excluded.funding_ack,source_filename = excluded.source_filename;", (str(r_grant.id), str(r_grant.source_id), str(r_grant.grant_number), str(r_grant.grant_agency), str(r_grant.funding_ack), str(new_pub.source_filename))) '''writer_grant.writerow((r_grant['id'],r_grant['source_id'],\ r_grant['grant_number'],r_grant['grant_agency'],\ r_grant['funding_ack'],\ r_publication['source_filename']))''' # insert code to insert record in r_grant table # parse document object identifiers for each publication r_dois = dois.dois() r_dois.source_id = new_pub.source_id IDS = REC.find('.//' + url + 'identifiers') if IDS is not None: for identifier in IDS.findall('.//' + url + 'identifier'): # r_dois['doi'] = None id_value = identifier.get('value') if id_value is not None: r_dois.doi = id_value.encode('utf-8') # r_dois['doi_type'] = '' id_type = identifier.get('type') if id_type is not None: r_dois.doi_type = id_type.encode('utf-8') # write each doi to CSV file for wos_document_identifiers table if r_dois.doi is not None: counters.r_doi_seq = counters.r_doi_seq + 1 r_dois.id = counters.r_doi_seq # insering records into wos_document_identifier table curs.execute( "INSERT INTO wos_document_identifiers(id,source_id,document_id,document_id_type,source_filename)VALUES(%s,%s,%s,%s,%s) ON CONFLICT (source_id, document_id_type, document_id) DO UPDATE SET id = excluded.id, source_id = excluded.source_id, document_id = excluded.document_id,document_id_type = excluded.document_id_type, source_filename = excluded.source_filename;", (str(r_dois.id), str(r_dois.source_id), str(r_dois.doi), str(r_dois.doi_type), str(new_pub.source_filename))) '''writer_dois.writerow((r_dois['id'], r_dois['source_id'], \ r_dois['doi'], r_dois['doi_type'], \ r_publication['source_filename']))''' # parse keyword for each publication keywords = REC.find('.//' + url + 'keywords_plus') if keywords is not None: r_keyword = key_word.keyword() r_keyword.source_id = new_pub.source_id for keyword in keywords.findall('.//' + url + 'keyword'): if keyword is not None: if keyword.text is not None: r_keyword.keyword = keyword.text.encode( 'utf-8') counters.r_keyword_seq = counters.r_keyword_seq + 1 r_keyword.id = counters.r_keyword_seq # inserting records in wos_keywords curs.execute( "INSERT INTO wos_keywords(id,source_id,keyword,source_filename)VALUES(%s,%s,%s,%s)ON CONFLICT (source_id, keyword) DO UPDATE SET id = excluded.id, source_id = excluded.source_id, keyword = excluded.keyword,source_filename = excluded.source_filename;", (str(r_keyword.id), str( r_keyword.source_id), str(r_keyword.keyword), str(new_pub.source_filename))) # old code for insering data into a text file ''''writer_keyword.writerow((r_keyword['id'], \ r_keyword['source_id'], r_keyword['keyword'], \ r_publication['source_filename']))''' '' # parse abstract for each publication if new_pub.has_abstract == 'Y': abstracts = REC.find('.//' + url + 'abstracts') if abstracts is not None: r_abst = abst.abstract() r_abst.source_id = new_pub.source_id r_abstract_text = '' for abstract_text in abstracts.findall('.//' + url + 'p'): if abstract_text is not None: if abstract_text.text is not None: if r_abstract_text: r_abstract_text = r_abstract_text.join( '\n\n') r_abstract_text = r_abstract_text + abstract_text.text.encode( 'utf-8') # adding all the abstract paragraphs into one before writing it into the database r_abst.abstract_text = r_abstract_text # old code # r_abst['abstract_text'] = abstract_text.text.\ # encode('utf-8') # r_abstract_seq +=1 # r_abst['id'] = r_abstract_seq # writer_abstract.writerow((r_abst['id'],\ # r_abst['source_id'],r_abst['abstract_text'],\ # r_publication['source_filename'])) # writing the abstracts record into the data base curs.execute( "INSERT INTO wos_abstracts(source_id,abstract_text,source_filename)VALUES(%s,%s,%s) ON CONFLICT (source_id) DO UPDATE SET source_id = excluded.source_id, abstract_text = excluded.abstract_text, source_filename = excluded.source_filename;;", (str(r_abst.source_id), str(r_abst.abstract_text), str(new_pub.source_filename))) '''writer_abstract.writerow( (r_abst['source_id'], r_abst['abstract_text'], r_publication['source_filename']))''' # parse addresses for each publication r_addr = add.address() # r_addr.id = {} # r_addr.source_id = {} # r_addr['addr_name'] = {} # r_addr['organization'] = {} # r_addr['suborganization'] = {} # r_addr['city'] = {} # r_addr['country'] = {} # r_addr['zip'] = {} addr_no_list = [] addresses = REC.find('.//' + url + 'addresses') for addr in addresses.findall('.//' + url + 'address_spec'): addr_ind = addr.get('addr_no') if addr_ind is None: addr_ind = 0 else: addr_ind = int(addr_ind) # Kepp all addr_no for the following reference by authors addr_no_list.append(int(addr_ind)) r_addr.source_id[addr_ind] = new_pub.source_id r_addr.addr_name[addr_ind] = '' addr_name = addr.find('.//' + url + 'full_address') if addr_name is not None: if addr_name.text is not None: r_addr.addr_name[addr_ind] = addr_name.text.encode( 'utf-8') r_addr.organization[addr_ind] = '' organization = addr.find('.//' + url + "organization[@pref='Y']") if organization is not None: if organization.text is not None: r_addr.organization[addr_ind] = organization.text. \ encode('utf-8') r_addr.sub_organization[addr_ind] = '' suborganization = addr.find('.//' + url + 'suborganization') if suborganization is not None: if suborganization.text is not None: r_addr.sub_organization[addr_ind] = suborganization.text. \ encode('utf-8') r_addr.city[addr_ind] = '' city = addr.find('.//' + url + 'city') if city is not None: if city.text is not None: r_addr.city[addr_ind] = city.text.encode('utf-8') r_addr.country[addr_ind] = '' country = addr.find('.//' + url + 'country') if country is not None: if country.text is not None: r_addr.country[addr_ind] = country.text.encode( 'utf-8') r_addr.zip_code[addr_ind] = '' addr_zip = addr.find('.//' + url + 'zip') if addr_zip is not None: if addr_zip.text is not None: r_addr.zip_code[addr_ind] = addr_zip.text.encode( 'utf-8') if r_addr.addr_name[addr_ind] is not None: counters.r_addr_seq += 1 r_addr.id[addr_ind] = counters.r_addr_seq # Insering address records into database curs.execute( "INSERT INTO wos_addresses(id,source_id,address_name,organization,sub_organization,city,country,zip_code,source_filename)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s)ON CONFLICT (source_id, address_name) DO UPDATE SET id = excluded.id, source_id = excluded.source_id, address_name = excluded.address_name,organization = excluded.organization, sub_organization = excluded.sub_organization, city = excluded.city,country = excluded.country, zip_code = excluded.zip_code, source_filename = excluded.source_filename;", (str(r_addr.id[addr_ind]), str(r_addr.source_id[addr_ind]), str(r_addr.addr_name[addr_ind]), str(r_addr.organization[addr_ind]), str(r_addr.sub_organization[addr_ind]), str(r_addr.city[addr_ind]), str(r_addr.country[addr_ind]), str(r_addr.zip_code[addr_ind]), str(new_pub.source_filename))) '''writer_address.writerow((r_addr['id'][addr_ind], \ r_addr['source_id'][addr_ind], r_addr['addr_name'][addr_ind], \ r_addr['organization'][addr_ind], \ r_addr['suborganization'][addr_ind], r_addr['city'][addr_ind], \ r_addr['country'][addr_ind], r_addr['zip'][addr_ind], \ r_publication['source_filename']))''' # parse titles for each publication r_title = ti.title() r_title.source_id = new_pub.source_id r_title.id = counters.r_title_seq summary = REC.find('.//' + url + 'summary') if summary is not None: titles = summary.find('.//' + url + 'titles') if titles is not None: for title in titles.findall('.//' + url + 'title'): if title is not None: if title.text is not None: r_title.title = title.text.encode('utf-8') r_title.type = title.get('type') r_title.id += 1 # inserting titles into the database curs.execute( "INSERT INTO wos_titles(id,source_id,title,type,source_filename)VALUES(%s,%s,%s,%s,%s)ON CONFLICT (source_id, type) DO UPDATE SET id = excluded.id, source_id = excluded.source_id, title = excluded.title, type = excluded.type,source_filename = excluded.source_filename;", (str(r_title.id), str( r_title.source_id), str(r_title.title), str(r_title.type), str(new_pub.source_filename))) '''writer_title.writerow((r_title['id'], \ r_title['source_id'], r_title['title'], \ r_title['type'], r_publication['source_filename']))''' # parse authors for each publication r_author = auth.author() r_author.source_id = new_pub.source_id summary = REC.find('.//' + url + 'summary') names = summary.find(url + 'names') for name in names.findall(url + "name[@role='author']"): # for name in REC.findall('.//'+url+"name[@role='author']"): # r_author.full_name = '' full_name = name.find(url + 'full_name') if full_name is not None: if full_name.text is not None: r_author.full_name = full_name.text.encode('utf-8') # r_author['wos_standard'] = '' wos_standard = name.find(url + 'wos_standard') if wos_standard is not None: if wos_standard.text is not None: r_author.wos_standard = wos_standard.text.encode( 'utf-8') r_author.first_name = '' first_name = name.find(url + 'first_name') if first_name is not None: if first_name.text is not None: r_author.first_name = first_name.text.encode( 'utf-8') # r_author.last_name = '' last_name = name.find(url + 'last_name') if last_name is not None: if last_name.text is not None: r_author.last_name = last_name.text.encode('utf-8') # r_author['email_addr'] = '' email_addr = name.find(url + 'email_addr') if email_addr is not None: if email_addr.text is not None: r_author.email_addr = email_addr.text.encode( 'utf-8') r_author.seq_no = name.get('seq_no') r_author.dais_id = name.get('dais_id') r_author.r_id = name.get('r_id') addr_seqs = name.get('addr_no') # r_author.address = '' r_author.address_id = '' r_author.addr_seq = '' if addr_seqs is not None: addr_no_str = addr_seqs.split(' ') for addr_seq in addr_no_str: if addr_seq is not None: addr_index = int(addr_seq) if addr_index in addr_no_list: r_author.address = r_addr.addr_name[ addr_index] r_author.address_id = r_addr.id[addr_index] r_author.addr_seq = addr_seq counters.r_author_seq += 1 r_author.id = counters.r_author_seq # inserting records into author table of data base curs.execute( "INSERT INTO wos_authors(id,source_id,full_name,last_name,first_name,seq_no,address_seq,address,email_address,address_id,dais_id,r_id,source_filename)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) ON CONFLICT (source_id, seq_no, address_id) DO UPDATE SET id = excluded.id, source_id = excluded.source_id, full_name = excluded.full_name,last_name = excluded.last_name, first_name = excluded.first_name, seq_no = excluded.seq_no,address_seq = excluded.address_seq, address = excluded.address, email_address = excluded.email_address,address_id = excluded.address_id, dais_id = excluded.dais_id, r_id = excluded.r_id,source_filename = excluded.source_filename;", (str(r_author.id), str(r_author.source_id), str(r_author.full_name), str(r_author.last_name), str(r_author.first_name), str(r_author.seq_no), str(r_author.addr_seq), str(r_author.address), str(r_author.email_addr), str(r_author.address_id), str(r_author.dais_id), str(r_author.r_id), str(new_pub.source_filename))) '''writer_author.writerow((r_author['id'], \ r_author['source_id'], r_author['full_name'], \ r_author['last_name'], r_author['first_name'], \ r_author['seq_no'], r_author['addr_seq'], \ r_author['address'], r_author['email_addr'], \ r_author['address_id'], r_author['dais_id'], \ r_author['r_id'], r_publication['source_filename']))''' else: counters.r_author_seq += 1 r_author.id = counters.r_author_seq r_author.address_id = 0 r_author.addr_seq = 0 # inserting records into author tables of database curs.execute( "INSERT INTO wos_authors(id,source_id,full_name,last_name,first_name,seq_no,email_address,dais_id,r_id,source_filename)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)ON CONFLICT (source_id, seq_no, address_id) DO UPDATE SET id = excluded.id, source_id = excluded.source_id, full_name = excluded.full_name,last_name = excluded.last_name, first_name = excluded.first_name, seq_no = excluded.seq_no,address_seq = excluded.address_seq, address = excluded.address, email_address = excluded.email_address,address_id = excluded.address_id, dais_id = excluded.dais_id, r_id = excluded.r_id,source_filename = excluded.source_filename;", (str(r_author.id), str(r_author.source_id), str(r_author.full_name), str(r_author.last_name), str(r_author.first_name), str(r_author.seq_no), str(r_author.email_addr), str(r_author.dais_id), str(r_author.r_id), str(new_pub.source_filename))) '''writer_author.writerow((r_author['id'], r_author['source_id'], \ r_author['full_name'], r_author['last_name'], \ r_author['first_name'], r_author['seq_no'], \ r_author['addr_seq'], r_author['address'], \ r_author['email_addr'], r_author['address_id'], \ r_author['dais_id'], r_author['r_id'], \ r_publication['source_filename']))''' # parse reference data for each publication REFERENCES = REC.find('.//' + url + 'references') for ref in REFERENCES.findall('.//' + url + 'reference'): # print "inside reference" r_reference = reference.reference() r_reference.source_id = new_pub.source_id r_reference.cited_source_uid = None cited_source_id = ref.find('.//' + url + 'uid') if cited_source_id is not None: if cited_source_id.text is not None: r_reference.cited_source_uid = cited_source_id.text. \ encode('utf-8') # r_reference['cited_title'] = '' cited_title = ref.find('.//' + url + 'citedTitle') if cited_title is not None: if cited_title.text is not None: r_reference.cited_title = cited_title.text.encode( 'utf-8') r_reference.cited_work = '' cited_work = ref.find('.//' + url + 'citedWork') if cited_work is not None: if cited_work.text is not None: r_reference.cited_work = cited_work.text.encode( 'utf-8') # r_reference['cited_author'] = '' cited_author = ref.find('.//' + url + 'citedAuthor') if cited_author is not None: if cited_author.text is not None: r_reference.cited_author = cited_author.text.encode( 'utf-8')[:299] # r_reference['cited_year'] = '' cited_year = ref.find('.//' + url + 'year') if cited_year is not None: if cited_year.text is not None: r_reference.cited_year = cited_year.text.encode( 'utf-8') # r_reference.cited_page = '' cited_page = ref.find('.//' + url + 'page') if cited_page is not None: if cited_page.text is not None: r_reference.cited_page = cited_page.text.encode( 'utf-8') r_reference.created_date = new_pub.created_date r_reference.last_modified_date = new_pub.last_modified_date if r_reference.cited_source_uid is not None: counters.r_reference_seq = counters.r_reference_seq + 1 r_reference.id = counters.r_reference_seq # inserting references into database curs.execute( "INSERT INTO wos_references(wos_reference_id,source_id,cited_source_uid,cited_title,cited_work,cited_author,cited_year,cited_page,created_date,last_modified_date,source_filename)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)ON CONFLICT ON CONSTRAINT wos_references_pk DO UPDATE SET source_id = excluded.source_id, cited_source_uid = excluded.cited_source_uid,cited_title = excluded.cited_title, cited_work = excluded.cited_work, cited_author = excluded.cited_author,cited_year = excluded.cited_year, cited_page = excluded.cited_page, created_date = excluded.created_date,last_modified_date = excluded.last_modified_date, source_filename = excluded.source_filename;", (str(r_reference.id), str(r_reference.source_id), str(r_reference.cited_source_uid), str(r_reference.cited_title), str(r_reference.cited_work), str(r_reference.cited_author), str(r_reference.cited_year), str(r_reference.cited_page), str(r_reference.created_date), str(r_reference.last_modified_date), str(new_pub.source_filename))) '''writer_ref.writerow((r_reference['id'], r_reference['source_id'], \ r_reference['cited_source_id'], r_reference['cited_title'], \ r_reference['cited_work'], r_reference['cited_author'], \ r_reference['cited_year'], r_reference['cited_page'], \ r_reference['created_date'], r_reference['last_modified_date'], \ r_publication['source_filename']))''' '''print "Processed", r_publication_seq, "records from", input_csv_dir + input_filename :-4] + "_publication.csv'" + " delimiter ',' CSV;\n" csvfile_load.write((copy_command)) copy_command = "\\copy new_wos_references from '" + xml_csv_dir + input_filename[ :-4] + "_reference.csv'" + " delimiter ',' CSV;\n" csvfile_load.write((copy_command)) copy_command = "\\copy new_wos_grants from '" + xml_csv_dir + input_filename[ :-4] + "_grant.csv'" + " delimiter ',' CSV;\n" csvfile_load.write((copy_command)) copy_command = "\\copy new_wos_addresses from '" + xml_csv_dir + input_filename[ :-4] + "_address.csv'" + " delimiter ',' CSV;\n" csvfile_load.write((copy_command)) copy_command = "\\copy new_wos_authors from '" + xml_csv_dir + input_filename[ :-4] + "_author.csv'" + " delimiter ',' CSV;\n" csvfile_load.write((copy_command)) copy_command = "\\copy new_wos_document_identifiers from '" + xml_csv_dir + input_filename[ :-4] + "_dois.csv'" + " delimiter ',' CSV;\n" csvfile_load.write((copy_command)) copy_command = "\\copy new_wos_abstracts from '" + xml_csv_dir + input_filename[ :-4] + "_abstract.csv'" + " delimiter ',' CSV;\n" csvfile_load.write((copy_command)) copy_command = "\\copy new_wos_keywords from '" + xml_csv_dir + input_filename[ :-4] + "_keyword.csv'" + " delimiter ',' CSV;\n" csvfile_load.write((copy_command)) copy_command = "\\copy new_wos_titles from '" + xml_csv_dir + input_filename[ :-4] + "_title.csv'" + " delimiter ',' CSV;\n" csvfile_load.write((copy_command)) # Close all opened files csvfile_publication.close() csvfile_reference.close() csvfile_abstract.close() csvfile_address.close() csvfile_author.close() csvfile_dois.close() csvfile_grant.close() csvfile_keyword.close() csvfile_title.close() csvfile_load.close() #print(rec.find(self.url + 'UID').text)''' # print('Database connection closed.') except ET.ParseError as error: print error return (counters)
def parse(self, xml_string, input_file_name, curs): url = '<xml header URL>' root = ET.fromstring(xml_string) for REC in root: # parse publications and create a publication object containing all the attributes of publication new_pub = pub.publication() new_pub.source_id = REC.find(url + 'UID').text pub_info = REC.find('.//' + url + 'pub_info') new_pub.source_type = pub_info.get('pubtype') source_title = REC.find('.//' + url + "title[@type='source']") if source_title is not None: if source_title.text is not None: new_pub.source_title = source_title.text.encode('utf-8') # extracting values from properties of pub_info tag in XMl new_pub.has_abstract = pub_info.get('has_abstract') new_pub.publication_year = pub_info.get('pubyear') new_pub.issue = pub_info.get('issue') new_pub.volume = pub_info.get('vol') new_pub.pubmonth = pub_info.get('pubmonth') new_pub.publication_date = pub_info.get('sortdate') new_pub.coverdate = pub_info.get('coverdate') page_info = pub_info.find(url + 'page') if page_info is not None: new_pub.begin_page = page_info.get('begin') new_pub.end_page = page_info.get('end') document_title = REC.find('.//' + url + "title[@type='item']") if document_title is not None: if document_title.text is not None: new_pub.document_title = document_title.text. \ encode('utf-8') document_type = REC.find('.//' + url + 'doctype') if document_type is not None: if document_type.text is not None: new_pub.document_type = document_type.text publisher_name = REC.find('.//' + url + "name[@role='publisher']") if publisher_name is not None: pub_name = publisher_name.find('.//' + url + 'full_name') if pub_name is not None: if pub_name.text is not None: new_pub.publisher_name = pub_name.text. \ encode('utf-8') pub_address_no = REC.find('.//' + url + "address_spec[@addr_no='1']") if pub_address_no is not None: publisher_address = pub_address_no.find('.//' + url + 'full_address') if publisher_address is not None: if publisher_address.text is not None: new_pub.publisher_address = publisher_address.text. \ encode('utf-8') languages = REC.find('.//' + url + 'languages') if languages is not None: language = languages.find('.//' + url + 'language') if language is not None: if language.text is not None: new_pub.language = language.text.encode('utf-8') new_pub.edition = REC.find('.//' + url + 'edition').get('value') new_pub.source_filename = input_file_name new_pub.created_date = datetime.date.today() new_pub.last_modified_date = datetime.date.today() ## query to insert a publication record into the publications table in the database ## The query may be written into a saperate file in future from where it is read in the form of a string ammended values and executed to make code look better # TODO Query below is hard to read. I'd try a multi-line string with the proper SQL formatting. curs.execute( '<query to upsert data in database>') # parse grants in funding acknowledgements for each publication # New method of creating an object to store everything in the form of proper objects which could be developed into classes having their own properties in future r_grant = grant.grant() r_grant.source_id = new_pub.source_id # r_grant.funding_ack = '' FUNDING_ACK = REC.find('.//' + url + 'fund_text') if FUNDING_ACK is not None: # if funding acknowledgement exists, then extract the r_grant(s) data funding_ack_p = FUNDING_ACK.find('.//' + url + 'p') if funding_ack_p is not None: if funding_ack_p.text is not None: r_grant.funding_ack = funding_ack_p.text.encode('utf-8') # looping through all the r_grant tags for l_grant in REC.findall('.//' + url + 'grant'): # r_grant.grant_agency = '' grant_agency = l_grant.find('.//' + url + 'grant_agency') if grant_agency is not None: if grant_agency.text is not None: r_grant.grant_agency = grant_agency.text.encode('utf-8') grant_ids = l_grant.find('.//' + url + 'grant_ids') if grant_ids is not None: for grant_id in grant_ids.findall('.//' + url + 'grant_id'): if grant_id is not None: if grant_id.text is not None: r_grant.grant_number = grant_id.text.encode('utf-8') if r_grant.funding_ack is not None: # insert the grant details in the grants table if there is any funding acknowledgement in the records curs.execute( '<query to upsert data in database>') # insert code to insert record in r_grant table # parse document object identifiers for each publication r_dois = dois.dois() r_dois.source_id = new_pub.source_id IDS = REC.find('.//' + url + 'identifiers') if IDS is not None: for identifier in IDS.findall('.//' + url + 'identifier'): id_value = identifier.get('value') if id_value is not None: r_dois.doi = id_value.encode('utf-8') id_type = identifier.get('type') if id_type is not None: r_dois.doi_type = id_type.encode('utf-8') if r_dois.doi is not None: # insering records into wos_document_identifier table curs.execute( '<query to upsert data in database>') # parse keyword for each publication keywords = REC.find('.//' + url + 'keywords_plus') if keywords is not None: r_keyword = key_word.wos_keyword() r_keyword.source_id = new_pub.source_id for keyword in keywords.findall('.//' + url + 'keyword'): if keyword is not None: if keyword.text is not None: r_keyword.keyword = keyword.text.encode('utf-8') # inserting records in wos_keywords curs.execute( '<query to upsert data in database>') # parse abstract for each publication if new_pub.has_abstract == 'Y': abstracts = REC.find('.//' + url + 'abstracts') if abstracts is not None: r_abst = abst.abstract() r_abst.source_id = new_pub.source_id r_abstract_text = '' for abstract_text in abstracts.findall('.//' + url + 'p'): if abstract_text is not None: if abstract_text.text is not None: if r_abstract_text != '' and abstract_text.text != '': r_abstract_text = r_abstract_text.join('\n\n') r_abstract_text = r_abstract_text + abstract_text.text.encode('utf-8') # adding all the abstract paragraphs into one before writing it into the database r_abst.abstract_text = re.sub( r"^[\n]+", "",r_abstract_text) # writing the abstracts record into the data base curs.execute( '<query to upsert data in database>') # parse addresses for each publication r_addr = add.address() addr_no_list = [] addresses = REC.find('.//' + url + 'addresses') for addr in addresses.findall('.//' + url + 'address_spec'): addr_ind = addr.get('addr_no') if addr_ind is None: addr_ind = 0 else: addr_ind = int(addr_ind) # Kepp all addr_no for the following reference by authors addr_no_list.append(int(addr_ind)) r_addr.source_id[addr_ind] = new_pub.source_id r_addr.addr_name[addr_ind] = '' addr_name = addr.find('.//' + url + 'full_address') if addr_name is not None: if addr_name.text is not None: r_addr.addr_name[addr_ind] = addr_name.text.encode('utf-8') r_addr.organization[addr_ind] = '' organization = addr.find('.//' + url + "organization[@pref='Y']") if organization is not None: if organization.text is not None: r_addr.organization[addr_ind] = organization.text. \ encode('utf-8') r_addr.sub_organization[addr_ind] = '' suborganization = addr.find('.//' + url + 'suborganization') if suborganization is not None: if suborganization.text is not None: r_addr.sub_organization[addr_ind] = suborganization.text. \ encode('utf-8') r_addr.city[addr_ind] = '' city = addr.find('.//' + url + 'city') if city is not None: if city.text is not None: r_addr.city[addr_ind] = city.text.encode('utf-8') r_addr.country[addr_ind] = '' country = addr.find('.//' + url + 'country') if country is not None: if country.text is not None: r_addr.country[addr_ind] = country.text.encode('utf-8') r_addr.zip_code[addr_ind] = '' addr_zip = addr.find('.//' + url + 'zip') if addr_zip is not None: if addr_zip.text is not None: r_addr.zip_code[addr_ind] = addr_zip.text.encode('utf-8') if r_addr.addr_name[addr_ind] is not None: # Insering address records into database and retrieving and storing the address_id for future use in authors insertion curs.execute( '<query to upsert data in database>') r_addr.id[addr_ind] = curs.fetchone()[0] # parse titles for each publication r_title = ti.title() r_title.source_id = new_pub.source_id summary = REC.find('.//' + url + 'summary') if summary is not None: titles = summary.find('.//' + url + 'titles') if titles is not None: for title in titles.findall('.//' + url + 'title'): if title is not None: if title.text is not None: r_title.title = title.text.encode('utf-8') r_title.type = title.get('type') # inserting titles into the database curs.execute( '<query to upsert data in database>') # parse authors for each publication r_author = auth.author() r_author.source_id = new_pub.source_id summary = REC.find('.//' + url + 'summary') names = summary.find(url + 'names') for name in names.findall(url + "name[@role='author']"): full_name = name.find(url + 'full_name') if full_name is not None: if full_name.text is not None: r_author.full_name = full_name.text.encode('utf-8') wos_standard = name.find(url + 'wos_standard') if wos_standard is not None: if wos_standard.text is not None: r_author.wos_standard = wos_standard.text.encode('utf-8') r_author.first_name = '' first_name = name.find(url + 'first_name') if first_name is not None: if first_name.text is not None: r_author.first_name = first_name.text.encode('utf-8') last_name = name.find(url + 'last_name') if last_name is not None: if last_name.text is not None: r_author.last_name = last_name.text.encode('utf-8') email_addr = name.find(url + 'email_addr') if email_addr is not None: if email_addr.text is not None: r_author.email_addr = email_addr.text.encode('utf-8') r_author.seq_no = name.get('seq_no') r_author.dais_id = name.get('dais_id') if (r_author.dais_id == None): r_author.dais_id = '' r_author.r_id = name.get('r_id') if (r_author.r_id == None): r_author.r_id = '' addr_seqs = name.get('addr_no') r_author.address_id = '' r_author.addr_seq = '' if addr_seqs is not None: addr_no_str = addr_seqs.split(' ') for addr_seq in addr_no_str: if addr_seq is not None: addr_index = int(addr_seq) if addr_index in addr_no_list: r_author.address = r_addr.addr_name[addr_index] r_author.address_id = r_addr.id[addr_index] r_author.addr_seq = addr_seq curs.execute( '<query to upsert data in database>') else: r_author.address_id = 0 r_author.addr_seq = 0 # inserting records into author tables of database curs.execute( '<query to upsert data in database>') # parse reference data for each publication REFERENCES = REC.find('.//' + url + 'references') for ref in REFERENCES.findall('.//' + url + 'reference'): r_reference = reference.reference() r_reference.source_id = new_pub.source_id r_reference.cited_source_uid = None cited_source_id = ref.find('.//' + url + 'uid') if cited_source_id is not None: if cited_source_id.text is not None: r_reference.cited_source_uid = cited_source_id.text. \ encode('utf-8') cited_title = ref.find('.//' + url + 'citedTitle') if cited_title is not None: if cited_title.text is not None: r_reference.cited_title = cited_title.text.encode('utf-8') r_reference.cited_work = '' cited_work = ref.find('.//' + url + 'citedWork') if cited_work is not None: if cited_work.text is not None: r_reference.cited_work = cited_work.text.encode('utf-8') cited_author = ref.find('.//' + url + 'citedAuthor') if cited_author is not None: if cited_author.text is not None: r_reference.cited_author = cited_author.text.encode('utf-8')[:299] cited_year = ref.find('.//' + url + 'year') if cited_year is not None: if cited_year.text is not None: r_reference.cited_year = cited_year.text.encode('utf-8') cited_page = ref.find('.//' + url + 'page') if cited_page is not None: if cited_page.text is not None: r_reference.cited_page = cited_page.text.encode('utf-8') r_reference.created_date = new_pub.created_date r_reference.last_modified_date = new_pub.last_modified_date if r_reference.cited_source_uid is not None: # inserting references into database curs.execute( '<query to upsert data in database>')
def instruct(): """Выводит на экран инструкцию""" print(''' 0- Выйти из программы 1- создать новый vals файл 2- обновить существующий vals файл 3- перекодировать базу используя vals 4- Разобрать xlsx файл на vars и vals ''') while True: instruct() ask=int(input('введите действие: ')) if ask==1: c = address(ask) if c[-3:] == 'csv': get_vals(c,read_csv,DataFrame) else: spss_to_vals(c) elif ask==2: c,k = address(ask) update_vals(c,k) elif ask==3: c,k = address(ask) recod_base(c,k) elif ask==4: a = address(ask) disintegration(a) elif ask==0: break
def entry(self, ex_id): var1 = 1 while var1 == 1: print "1 ADDRESS CHANGE" print "2 OPEN NEW ACCOUNT" print "3 MONEY DEPOSIT" print "4 MONEY WITHDRAWL" print "5 MONEY TRANSFER" print "6 PRINT STATEMENT" print "7 ACCOUNT CLOSURE" print "8 AVAIL LOAN" print "0 CUSTOMER LOGOUT" c = input() c = int(c) if c == 1: s2 = address() s2.add_change(ex_id) elif c == 3: s2 = money_dep() s2.money_deposit(ex_id) elif c == 4: s2 = money_wid() s2.money_withdrawl(ex_id) elif c == 6: pr = transPrint() pr.printi(ex_id) elif c == 5: s2 = money_transfer() print("enter the id to transfer money") cus = raw_input() cus = int(cus) conn = pymysql.connect("localhost", "root", "", "bankM5", autocommit=True) cur = conn.cursor() sq3 = "SELECT * FROM cust_info" cur.execute(sq3) r = cur.rowcount if (cus > r): print("wrong id plz input again:") else: cus = str(cus) s2.tranfer(ex_id, cus) elif c == 7: s2 = acc_close() s2.account_closure(ex_id) var1 = 2 elif c == 0: print "logged out" var1 = 2 elif c == 2: s1 = open_new_acc() s1.open(ex_id) elif c == 8: s1 = open_new_loan() s1.open(ex_id) elif c > 8: print("wrong choice enter again")
def setUp(self): from address import address if self.adr_svc is None: self.adr_svc = address()
def lambda_handler(event, context): from address import address adr_svc = address() status_code = 200 adr_svc.log.info(event) try: try: try: if event['context']['resource-path'] == "/address"\ and event['context']['http-method'] == "POST": res = adr_svc.create_address( json.dumps({ 'address': event['body-json'], 'stage': event['context']['stage'] }).decode('utf-8') ) elif event['context']['resource-path'] == '/address'\ and event['context']['http-method'] == "GET"\ and event['params']['path'].get('addressId') is None: res = adr_svc.list_addresses({ 'address': None, 'stage': event['context']['stage'] }) elif event['body']['context']['resource-path'] ==\ "/address/{addressId}"\ and event['body']['context']['http-method'] == "PUT"\ and event['path']['params'].get('addressId') is not None: res = adr_svc.update_address({ 'address': event['body-json'], 'stage': event['context']['stage'] }) elif event['context']['resource-path'] ==\ '/address/{addressId}'\ and event['context']['http-method'] == "GET"\ and event['params']['path'].get('addressId') is not None: res = adr_svc.get_address( {'addressId': event['params']['path']['addressId'], 'stage': event['context']['stage'] }) elif event['context']['resource-path'] ==\ '/address/{addressId}'\ and event['context']['http-method'] == "DELETE"\ and event['path']['params'].get('addressId') is not None: res = \ adr_svc.remove_address({ 'addressId': event['params']['path']['addressId'], 'stage': event['context']['stage'] }) else: status_code = 403 res = "Request not valid" except KeyError as e: status_code = 404 res = { 'Error': e.message, 'statusCode': status_code } adr_svc.log.error(res) except urllib3.exceptions.ProtocolError as e: res = { 'Error': e.message, 'statusCode': e.status } adr_svc.log.error(res) output = res except botocore.exceptions.ClientError as e: output = json.dumps( {'status': e.response['ResponseMetadata']['HTTPStatusCode'], 'body': 'Failed to update: %s' % e.response['Error']['Code']} ) adr_svc.log.error(json.dumps(output)) return output
def parse(self, xml_string, input_file_name, curs): url = '{http://clarivate.com/schema/wok5.27/public/FullRecord}' root = ET.fromstring(xml_string) for REC in root: # parse publications and create a publication object containing all the attributes of publication new_pub = pub.publication() new_pub.source_id = REC.find(url + 'UID').text pub_info = REC.find('.//' + url + 'pub_info') new_pub.source_type = pub_info.get('pubtype') source_title = REC.find('.//' + url + "title[@type='source']") if source_title is not None: if source_title.text is not None: new_pub.source_title = source_title.text.encode('utf-8') # extracting values from properties of pub_info tag in XMl new_pub.has_abstract = pub_info.get('has_abstract') new_pub.publication_year = pub_info.get('pubyear') new_pub.issue = pub_info.get('issue') new_pub.volume = pub_info.get('vol') new_pub.pubmonth = pub_info.get('pubmonth') new_pub.publication_date = pub_info.get('sortdate') new_pub.coverdate = pub_info.get('coverdate') page_info = pub_info.find(url + 'page') if page_info is not None: new_pub.begin_page = page_info.get('begin') new_pub.end_page = page_info.get('end') document_title = REC.find('.//' + url + "title[@type='item']") if document_title is not None: if document_title.text is not None: new_pub.document_title = document_title.text. \ encode('utf-8') document_type = REC.find('.//' + url + 'doctype') if document_type is not None: if document_type.text is not None: new_pub.document_type = document_type.text publisher_name = REC.find('.//' + url + "name[@role='publisher']") if publisher_name is not None: pub_name = publisher_name.find('.//' + url + 'full_name') if pub_name is not None: if pub_name.text is not None: new_pub.publisher_name = pub_name.text. \ encode('utf-8') pub_address_no = REC.find('.//' + url + "address_spec[@addr_no='1']") if pub_address_no is not None: publisher_address = pub_address_no.find('.//' + url + 'full_address') if publisher_address is not None: if publisher_address.text is not None: new_pub.publisher_address = publisher_address.text. \ encode('utf-8') languages = REC.find('.//' + url + 'languages') if languages is not None: language = languages.find('.//' + url + 'language') if language is not None: if language.text is not None: new_pub.language = language.text.encode('utf-8') new_pub.edition = REC.find('.//' + url + 'edition').get('value') new_pub.source_filename = input_file_name new_pub.created_date = datetime.date.today() new_pub.last_modified_date = datetime.date.today() ## query to insert a publication record into the publications table in the database ## The query may be written into a saperate file in future from where it is read in the form of a string ammended values and executed to make code look better # TODO Query below is hard to read. I'd try a multi-line string with the proper SQL formatting. curs.execute( "INSERT INTO wos_publications(begin_page, created_date, document_title, document_type,edition, end_page,has_abstract,issue,"\ "language,last_modified_date,publication_date,publication_year,publisher_address,publisher_name,source_filename,source_id,"\ "source_title,source_type,volume)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) ON CONFLICT (source_id)"\ "DO UPDATE SET begin_page = excluded.begin_page, created_date = excluded.created_date,document_title ="\ " excluded.document_title, document_type = excluded.document_type, edition = excluded.edition,end_page ="\ "excluded.end_page, has_abstract = excluded.has_abstract, issue = excluded.issue,language = excluded.language,"\ "last_modified_date = excluded.last_modified_date,publication_date = excluded.publication_date, publication_year"\ "= excluded.publication_year,publisher_address = excluded.publisher_address, publisher_name = excluded.publisher_name,"\ "source_filename = excluded.source_filename, source_id = excluded.source_id, source_title = excluded.source_title,"\ "source_type = excluded.source_type, volume = excluded.volume, last_updated_time=current_timestamp;", (str(new_pub.begin_page), new_pub.created_date, str(new_pub.document_title), str(new_pub.document_type), str(new_pub.edition), str(new_pub.end_page), str(new_pub.has_abstract), str(new_pub.issue), str(new_pub.language), new_pub.last_modified_date, new_pub.publication_date, str(new_pub.publication_year), str(new_pub.publisher_address), str(new_pub.publisher_name), str(new_pub.source_filename), str(new_pub.source_id), str(new_pub.source_title), new_pub.source_type, str(new_pub.volume))) # parse grants in funding acknowledgements for each publication # New method of creating an object to store everything in the form of proper objects which could be developed into classes having their own properties in future r_grant = grant.grant() r_grant.source_id = new_pub.source_id # r_grant.funding_ack = '' FUNDING_ACK = REC.find('.//' + url + 'fund_text') if FUNDING_ACK is not None: # if funding acknowledgement exists, then extract the r_grant(s) data funding_ack_p = FUNDING_ACK.find('.//' + url + 'p') if funding_ack_p is not None: if funding_ack_p.text is not None: r_grant.funding_ack = funding_ack_p.text.encode('utf-8') # looping through all the r_grant tags for l_grant in REC.findall('.//' + url + 'grant'): # r_grant.grant_agency = '' grant_agency = l_grant.find('.//' + url + 'grant_agency') if grant_agency is not None: if grant_agency.text is not None: r_grant.grant_agency = grant_agency.text.encode('utf-8') grant_ids = l_grant.find('.//' + url + 'grant_ids') if grant_ids is not None: for grant_id in grant_ids.findall('.//' + url + 'grant_id'): if grant_id is not None: if grant_id.text is not None: r_grant.grant_number = grant_id.text.encode('utf-8') if r_grant.funding_ack is not None: # insert the grant details in the grants table if there is any funding acknowledgement in the records curs.execute( "INSERT INTO wos_grants(source_id,grant_number,grant_organization,funding_ack,source_filename)VALUES"\ "(%s,%s,%s,%s,%s) ON CONFLICT (source_id, grant_number, grant_organization) DO UPDATE SET source_id"\ "= excluded.source_id, grant_number = excluded.grant_number,grant_organization ="\ "excluded.grant_organization, funding_ack = excluded.funding_ack,source_filename ="\ "excluded.source_filename, last_updated_time=current_timestamp;", (str(r_grant.source_id), str(r_grant.grant_number), str(r_grant.grant_agency), str(r_grant.funding_ack), str(new_pub.source_filename))) # insert code to insert record in r_grant table # parse document object identifiers for each publication r_dois = dois.dois() r_dois.source_id = new_pub.source_id IDS = REC.find('.//' + url + 'identifiers') if IDS is not None: for identifier in IDS.findall('.//' + url + 'identifier'): id_value = identifier.get('value') if id_value is not None: r_dois.doi = id_value.encode('utf-8') id_type = identifier.get('type') if id_type is not None: r_dois.doi_type = id_type.encode('utf-8') if r_dois.doi is not None: # insering records into wos_document_identifier table curs.execute( "INSERT INTO wos_document_identifiers(source_id,document_id,document_id_type,source_filename)VALUES(%s,%s,%s,%s)"\ "ON CONFLICT (source_id, document_id_type, document_id) DO UPDATE SET source_id = excluded.source_id,"\ "document_id = excluded.document_id,document_id_type = excluded.document_id_type, source_filename ="\ "excluded.source_filename, last_updated_time=current_timestamp;", (str(r_dois.source_id), str(r_dois.doi), str(r_dois.doi_type), str(new_pub.source_filename))) # parse keyword for each publication keywords = REC.find('.//' + url + 'keywords_plus') if keywords is not None: r_keyword = key_word.wos_keyword() r_keyword.source_id = new_pub.source_id for keyword in keywords.findall('.//' + url + 'keyword'): if keyword is not None: if keyword.text is not None: r_keyword.keyword = keyword.text.encode('utf-8') # inserting records in wos_keywords curs.execute( "INSERT INTO wos_keywords(source_id,keyword,source_filename)VALUES(%s,%s,%s)ON CONFLICT"\ "(source_id, keyword) DO UPDATE SET source_id = excluded.source_id, keyword = excluded.keyword,"\ "source_filename = excluded.source_filename, last_updated_time=current_timestamp;", (str(r_keyword.source_id), str(r_keyword.keyword), str(new_pub.source_filename))) # parse abstract for each publication if new_pub.has_abstract == 'Y': abstracts = REC.find('.//' + url + 'abstracts') if abstracts is not None: r_abst = abst.abstract() r_abst.source_id = new_pub.source_id r_abstract_text = '' for abstract_text in abstracts.findall('.//' + url + 'p'): if abstract_text is not None: if abstract_text.text is not None: if r_abstract_text != '' and abstract_text.text != '': r_abstract_text = r_abstract_text.join('\n\n') r_abstract_text = r_abstract_text + abstract_text.text.encode('utf-8') # adding all the abstract paragraphs into one before writing it into the database r_abst.abstract_text = re.sub( r"^[\n]+", "",r_abstract_text) # writing the abstracts record into the data base curs.execute( "INSERT INTO wos_abstracts(source_id,abstract_text,source_filename)VALUES(%s,%s,%s) ON CONFLICT(source_id) DO UPDATE"\ " SET source_id = excluded.source_id,abstract_text = excluded.abstract_text,source_filename = excluded.source_filename, last_updated_time=current_timestamp;", (str(r_abst.source_id), str(r_abst.abstract_text), str(new_pub.source_filename))) # parse addresses for each publication r_addr = add.address() addr_no_list = [] addresses = REC.find('.//' + url + 'addresses') for addr in addresses.findall('.//' + url + 'address_spec'): addr_ind = addr.get('addr_no') if addr_ind is None: addr_ind = 0 else: addr_ind = int(addr_ind) # Kepp all addr_no for the following reference by authors addr_no_list.append(int(addr_ind)) r_addr.source_id[addr_ind] = new_pub.source_id r_addr.addr_name[addr_ind] = '' addr_name = addr.find('.//' + url + 'full_address') if addr_name is not None: if addr_name.text is not None: r_addr.addr_name[addr_ind] = addr_name.text.encode('utf-8') r_addr.organization[addr_ind] = '' organization = addr.find('.//' + url + "organization[@pref='Y']") if organization is not None: if organization.text is not None: r_addr.organization[addr_ind] = organization.text. \ encode('utf-8') r_addr.sub_organization[addr_ind] = '' suborganization = addr.find('.//' + url + 'suborganization') if suborganization is not None: if suborganization.text is not None: r_addr.sub_organization[addr_ind] = suborganization.text. \ encode('utf-8') r_addr.city[addr_ind] = '' city = addr.find('.//' + url + 'city') if city is not None: if city.text is not None: r_addr.city[addr_ind] = city.text.encode('utf-8') r_addr.country[addr_ind] = '' country = addr.find('.//' + url + 'country') if country is not None: if country.text is not None: r_addr.country[addr_ind] = country.text.encode('utf-8') r_addr.zip_code[addr_ind] = '' addr_zip = addr.find('.//' + url + 'zip') if addr_zip is not None: if addr_zip.text is not None: r_addr.zip_code[addr_ind] = addr_zip.text.encode('utf-8') if r_addr.addr_name[addr_ind] is not None: # Insering address records into database and retrieving and storing the address_id for future use in authors insertion curs.execute( "INSERT INTO wos_addresses(source_id,address_name,organization,sub_organization,city,country,zip_code,source_filename)"\ "VALUES(%s,%s,%s,%s,%s,%s,%s,%s)ON CONFLICT (source_id, address_name) DO UPDATE SET source_id = excluded.source_id,"\ "address_name = excluded.address_name,organization = excluded.organization, sub_organization = excluded.sub_organization,"\ "city = excluded.city,country = excluded.country, zip_code = excluded.zip_code, source_filename = excluded.source_filename RETURNING id, last_updated_time=current_timestamp;", (str(r_addr.source_id[addr_ind]), str(r_addr.addr_name[addr_ind]), str(r_addr.organization[addr_ind]), str(r_addr.sub_organization[addr_ind]), str(r_addr.city[addr_ind]), str(r_addr.country[addr_ind]), str(r_addr.zip_code[addr_ind]), str(new_pub.source_filename))) r_addr.id[addr_ind] = curs.fetchone()[0] # parse titles for each publication r_title = ti.title() r_title.source_id = new_pub.source_id summary = REC.find('.//' + url + 'summary') if summary is not None: titles = summary.find('.//' + url + 'titles') if titles is not None: for title in titles.findall('.//' + url + 'title'): if title is not None: if title.text is not None: r_title.title = title.text.encode('utf-8') r_title.type = title.get('type') # inserting titles into the database curs.execute( "INSERT INTO wos_titles(source_id,title,type,source_filename)VALUES(%s,%s,%s,%s)ON CONFLICT (source_id, type)"\ "DO UPDATE SET source_id = excluded.source_id, title = excluded.title, type = excluded.type,source_filename ="\ "excluded.source_filename, last_updated_time=current_timestamp;", (str(r_title.source_id), str(r_title.title), str(r_title.type), str(new_pub.source_filename))) # parse subjects for each publication r_subjects = sb.subjects() r_subjects.source_id = new_pub.source_id subjects = REC.find('.//' + url + 'subjects') if subjects is not None: for subject in subjects.findall('.//' + url + 'subject'): if subject is not None: if subject.text is not None: r_subjects.subject = subject.text.encode('utf-8') r_subjects.subject_classification_type = subject.get('ascatype') #inserting subjects into the database curs.execute( "INSERT INTO wos_publication_subjects(source_id,subject_classification_type,subject,source_filename)VALUES(%s,%s,%s,%s)ON CONFLICT (source_id,subject_classification_type,subject)"\ "DO UPDATE SET source_id = excluded.source_id, subject_classification_type = excluded.subject_classification_type, subject = excluded.subject,source_filename ="\ "excluded.source_filename, last_updated_time=current_timestamp;", (str(r_subjects.source_id), str(r_subjects.subject_classification_type), str(r_subjects.subject), str(new_pub.source_filename))) # parse authors for each publication r_author = auth.author() r_author.source_id = new_pub.source_id summary = REC.find('.//' + url + 'summary') names = summary.find(url + 'names') for name in names.findall(url + "name[@role='author']"): full_name = name.find(url + 'full_name') if full_name is not None: if full_name.text is not None: r_author.full_name = full_name.text.encode('utf-8') wos_standard = name.find(url + 'wos_standard') if wos_standard is not None: if wos_standard.text is not None: r_author.wos_standard = wos_standard.text.encode('utf-8') r_author.first_name = '' first_name = name.find(url + 'first_name') if first_name is not None: if first_name.text is not None: r_author.first_name = first_name.text.encode('utf-8') last_name = name.find(url + 'last_name') if last_name is not None: if last_name.text is not None: r_author.last_name = last_name.text.encode('utf-8') email_addr = name.find(url + 'email_addr') if email_addr is not None: if email_addr.text is not None: r_author.email_addr = email_addr.text.encode('utf-8') r_author.seq_no = name.get('seq_no') r_author.dais_id = name.get('dais_id') if (r_author.dais_id == None): r_author.dais_id = '' r_author.r_id = name.get('r_id') if (r_author.r_id == None): r_author.r_id = '' addr_seqs = name.get('addr_no') r_author.address_id = '' r_author.addr_seq = '' if addr_seqs is not None: addr_no_str = addr_seqs.split(' ') for addr_seq in addr_no_str: if addr_seq is not None: addr_index = int(addr_seq) if addr_index in addr_no_list: r_author.address = r_addr.addr_name[addr_index] r_author.address_id = r_addr.id[addr_index] r_author.addr_seq = addr_seq curs.execute( "INSERT INTO wos_authors(source_id,full_name,last_name,first_name,seq_no,address_seq,address,email_address,address_id,"\ "dais_id,r_id,source_filename)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) ON CONFLICT (source_id, seq_no, address_id)"\ "DO UPDATE SET source_id = excluded.source_id, full_name = excluded.full_name,last_name = excluded.last_name, first_name"\ "= excluded.first_name, seq_no = excluded.seq_no,address_seq = excluded.address_seq, address = excluded.address,"\ "email_address = excluded.email_address,address_id = excluded.address_id, dais_id = excluded.dais_id, r_id ="\ "excluded.r_id,source_filename = excluded.source_filename, last_updated_time=current_timestamp;", (str(r_author.source_id), str(r_author.full_name), str(r_author.last_name), str(r_author.first_name), str(r_author.seq_no), str(r_author.addr_seq), str(r_author.address), str(r_author.email_addr), str(r_author.address_id), str(r_author.dais_id), str(r_author.r_id), str(new_pub.source_filename))) else: r_author.address_id = 0 r_author.addr_seq = 0 # inserting records into author tables of database curs.execute( "INSERT INTO wos_authors(source_id,full_name,last_name,first_name,seq_no,email_address,dais_id,r_id,source_filename)"\ "VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s)ON CONFLICT (source_id, seq_no, address_id) DO UPDATE SET source_id ="\ "excluded.source_id, full_name = excluded.full_name,last_name = excluded.last_name, first_name ="\ "excluded.first_name, seq_no = excluded.seq_no,address_seq = excluded.address_seq, address = excluded.address,"\ "email_address = excluded.email_address,address_id = excluded.address_id, dais_id = excluded.dais_id, r_id ="\ "excluded.r_id,source_filename = excluded.source_filename, last_updated_time=current_timestamp;", ( str(r_author.source_id), str(r_author.full_name), str(r_author.last_name), str(r_author.first_name), str(r_author.seq_no), str(r_author.email_addr), str(r_author.dais_id), str(r_author.r_id), str(new_pub.source_filename))) # parse reference data for each publication REFERENCES = REC.find('.//' + url + 'references') for ref in REFERENCES.findall('.//' + url + 'reference'): try: r_reference = reference.reference() r_reference.source_id = new_pub.source_id r_reference.cited_source_uid = None cited_source_id = ref.find('.//' + url + 'uid') if cited_source_id is not None: if cited_source_id.text is not None: r_reference.cited_source_uid = cited_source_id.text. \ encode('utf-8') cited_title = ref.find('.//' + url + 'citedTitle') if cited_title is not None: if cited_title.text is not None: r_reference.cited_title = cited_title.text.encode('utf-8') r_reference.cited_work = '' cited_work = ref.find('.//' + url + 'citedWork') if cited_work is not None: if cited_work.text is not None: r_reference.cited_work = cited_work.text.encode('utf-8') cited_author = ref.find('.//' + url + 'citedAuthor') if cited_author is not None: if cited_author.text is not None: r_reference.cited_author = cited_author.text.encode('utf-8')[:299] cited_year = ref.find('.//' + url + 'year') if cited_year is not None: if cited_year.text is not None: r_reference.cited_year = cited_year.text.encode('utf-8') cited_page = ref.find('.//' + url + 'page') if cited_page is not None: if cited_page.text is not None: r_reference.cited_page = cited_page.text.encode('utf-8') r_reference.created_date = new_pub.created_date r_reference.last_modified_date = new_pub.last_modified_date if r_reference.cited_source_uid is not None: # inserting references into database curs.execute( "INSERT INTO wos_references(source_id,cited_source_uid,cited_title,cited_work,cited_author,cited_year,cited_page,"\ "created_date,last_modified_date,source_filename)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)ON CONFLICT ON CONSTRAINT"\ " wos_references_pk DO UPDATE SET source_id = excluded.source_id, cited_source_uid = excluded.cited_source_uid,"\ "cited_title = excluded.cited_title, cited_work = excluded.cited_work, cited_author = excluded.cited_author,"\ "cited_year = excluded.cited_year, cited_page = excluded.cited_page, created_date = excluded.created_date,"\ "last_modified_date = excluded.last_modified_date, source_filename = excluded.source_filename, last_updated_time=current_timestamp;", (str(r_reference.source_id), str(r_reference.cited_source_uid), str(r_reference.cited_title), str(r_reference.cited_work), str(r_reference.cited_author), str(r_reference.cited_year), str(r_reference.cited_page), str(r_reference.created_date), str(r_reference.last_modified_date), str(new_pub.source_filename))) except Exception: print "ERROR occurred for the following reference record:\n", r_reference raise
#! /usr/bin/python3 from name_module import print_name import address import sys print(dir(sys)) print_name('Himanshu') address.address("210 Hawkes bay, California")