def get_search_field(): print("\n=============================\n") print("Please select one of the options from below: (Example: To select 'Author', press 3 and Enter)") for cnt, opt in enumerate(opt_tags): print("%s : %s" % (cnt + 1, getValue(opt.contents))) print("\n=============================\n") choice = raw_input("Your option: ") #choice = 7 field = search_field(choice) return field
def get_search_field(): print("\n=============================\n") print( "Please select one of the options from below: (Example: To select 'Author', press 3 and Enter)" ) for cnt, opt in enumerate(opt_tags): print("%s : %s" % (cnt + 1, getValue(opt.contents))) print("\n=============================\n") choice = raw_input("Your option: ") #choice = 7 field = search_field(choice) return field
def wos_login(username, password): try: url = "http://login.webofknowledge.com" response = br.open(url) br.form = list(br.forms())[0] for control in br.form.controls: if control.type == "text": control.value = username elif control.type == "password": control.value = password elif control.type == "submit": control.disabled = True br.submit() result = br.response().read() soup = BeautifulSoup(result, 'html.parser') next_url = soup.find('input', id='currUrl')['value'] print(next_url, file=login) # Logout #br.follow_link(text='Log Out') except TypeError: get_error = soup.find( 'td', {'class': "NEWwokErrorContainer SignInLeftColumn"}) error = getValue(get_error.find('h2').contents) print("Error: %s" % (error)) next = raw_input("%s (y/n)? : " % getValue(get_error.p.a.contents)) #next = 'y' if next not in ['y', 'Y']: print("Exiting") else: print("Closing other login session..") next_url = get_error.p.a['href'] print(next_url, file=login)
def wos_login(username, password): try: url="http://login.webofknowledge.com" response = br.open(url) br.form = list(br.forms())[0] for control in br.form.controls: if control.type == "text": control.value = username elif control.type == "password": control.value = password elif control.type == "submit": control.disabled=True br.submit() result = br.response().read() soup = BeautifulSoup(result, 'html.parser') next_url = soup.find('input', id='currUrl')['value'] print(next_url, file=login) # Logout #br.follow_link(text='Log Out') except TypeError: get_error = soup.find('td', {'class' : "NEWwokErrorContainer SignInLeftColumn"}) error = getValue(get_error.find('h2').contents) print("Error: %s" % (error)) next = raw_input("%s (y/n)? : " % getValue(get_error.p.a.contents)) #next = 'y' if next not in ['y', 'Y']: print("Exiting") else: print("Closing other login session..") next_url = get_error.p.a['href'] print(next_url, file=login)
model.addConstr(mu[0, j] == nodeNum * X[0, j], name='Const 9.8' + str(j)) # add constraints 9.9 for i in range(0, nodeNum + 1): for j in range(0, nodeNum + 1): if (i != j): model.addConstr(mu[i, j] <= nodeNum * X[i, j], name='Const 9.9' + str(i) + 'to' + str(j)) # add constraints 9.10 for j in range(1, nodeNum + 1): lhs = LinExpr(0) for i in range(nodeNum + 1): if (i != j): lhs.addTerms(1, mu[i, j]) lhs.addTerms(-1, mu[j, i]) model.addConstr(lhs == 1, name='Const 9.10 ' + str(j)) model.setParam('TimeLimit', 1000) model.write('model.lp') model.optimize() x_value = getValue(X, nodeNum) route = getRoute(x_value) print('optimal route:', route) print('optimal value', model.objVal) endtime = time.time() print('time', endtime - starttime) print('Gap', model.MIPGap)
def extract_data(data_url): field1="" field2="" field3="" field4="" field5="" field6="" field7="" field8="" field9="" field10="" field11="" field12="" field13="" field14="" field15="" field16="" field17="" field18="" field19="" field20="" field21="" field22="" field23="" field24="" field25="" field26="" f = urllib.urlopen(data_url) rsp = f.read() print("\n====================================\n", file=ex_log) print(rsp, file=ex_log) #### Beautifulsoup soup = BeautifulSoup(rsp, "html.parser") # kill all script and style elements for script in soup(["script", "style"]): script.extract() # rip it out #Code to extract Abstract, Keywords, Publisher, Categories/Classification, Document Information, DocumentType, Language div_tags = soup.find_all('div', {'class' : 'block-record-info'}) for d in div_tags: print(d, file=ex_log) try: title = getValue(d.div.contents) if title == "Abstract": field14=getValue(d.p.contents) elif title == "Keywords": a_tags = d.p.find_all('a', {'title' : "Find more records by this keywords plus"}) for a in a_tags: field15=field15 + ", " + getValue(a.contents) field15=field15[2:] elif title == "Publisher": field16=getValue(d.p.value.contents) elif title == "Categories / Classification": field18=d.p.get_text().split(':')[1] elif title == "Document Information": for doc in d.find_all('p'): span = getValue(doc.span.contents) value = doc.get_text().split(':')[1] if span == "Document Type:": field19=value elif span == "Language:": field20=value except AttributeError: pass #Code to extract Author, Volume, Issue, Pages, DOI, Published Date, ISSN, Research Domain, Accession Number, eISSN, IDS Number p_tags = soup.find_all('p', {'class' : 'FR_field'}) #p_tags = p_tags[1:] for p in p_tags: try: span = getValue(p.span.contents) span = span.split(':')[0] if span == "By": a_tags = p.find_all('a', {'title' : "Find more records by this author"}) for a in a_tags: field2=field2 + "; " + getValue(a.contents) try: field3=field3 + "; " + a.next_sibling.split('(')[1].split(')')[0].strip() except IndexError: pass field2=field2[2:] field3=field3[2:] elif span == "Reprint Address": field4=p.span.next_sibling.split('(')[0].strip() field5=p.find('td', {'class': 'fr_address_row2'}).contents[0] elif span == "Addresses": tr_tags = p.find_all('tr') for tr in tr_tags: try: field6=field6 + "; " + getValue(tr.preferred_org.contents) except AttributeError: pass field6=field6[2:] a_tags = p.find_all('a', {'name': re.compile("addressWOS:*")}) for a in a_tags: field7=field7 + "; " + getValue(a.contents) field7=field7[2:] elif span == "E-mail Addresses": a_tags = p.find_all('a') for a in a_tags: field8=field8 + "; " + getValue(a.contents) field8=field8[2:] elif span == "Volume": field9=getValue(p.value.contents) elif span == "Issue": field10=getValue(p.value.contents) elif span == "Pages": field11=getValue(p.value.contents) elif span == "DOI": field12=getValue(p.value.contents) elif span == "Published": field13=getValue(p.value.contents) elif span == "ISSN": field22=getValue(p.value.contents) elif span == "Research Domain ": field17=getValue(p.value.contents) elif span == "Accession Number": field21=getValue(p.value.contents) elif span == "eISSN": field23=getValue(p.value.contents) elif span == "IDS Number": field24=getValue(p.value.contents) elif span == "Cited References in Web of Science Core Collection": field25=getValue(p.a.b.contents) elif span == "Times Cited in Web of Science Core Collection": field26=getValue(p.b.contents) except AttributeError: pass record = "|".join([field1, field2, field3, field4, field5, field6, field7, field8, field9, field10, field11, field12, field13, field14, field15, field16, field17, field18, field19, field20, field21, field22, field23, field24, field25, field26]) f.close() return record
def open_url(src_DOI, src_AN, ref_url): #f = urllib.urlopen(ref_url) f = br.open(ref_url) rsp = f.read() print(rsp) #### Beautifulsoup soup = BeautifulSoup(rsp, "html.parser") try: print("In try") result = soup.find('span', id="trueFinalResultCount") total_count = getValue(result.contents) print("Number of search results: %s" % (total_count)) search_count = soup.find('span', id='pageCount.top') page_count = getValue(search_count.contents) print("Number of search pages: %s" % (page_count)) if int(page_count) > 1: print("page count > 1") page_links = soup.find('a', {'class' : 'paginationNext', 'title' : 'Next Page'}) page_link = page_links['href'] page_link = page_link[:len(page_link)-1] i=1 #while i <= 2: while i <= int(page_count): print(str(i) + "|" + page_link + str(i)) print(str(i) + "|" + page_link + str(i), file=page_log) new_url = page_link + str(i) p = br.open(new_url) page_rsp = p.read() soup = BeautifulSoup(page_rsp, "html.parser") div_tags = soup.find_all('div', {'class' : "search-results-item"}) print(div_tags) for div in div_tags: try: ref = div.find('a', {'class' : "smallV110"})['href'] print(ref) link = "http://apps.webofknowledge.com" + ref except TypeError: link = "No link" try: span = div.find('span', {'class' : "reference-title"}) title = getValue(span.value.contents) except AttributeError: title = "Title: [not available]" print(link) print(title) print(str(i) + "|" + src_DOI + "|" + src_AN + "|" + title + "|" + link) print(str(i) + "|" + src_DOI + "|" + src_AN + "|" + title + "|" + link, file=result_log) br.back() i+=1 elif int(page_count) == 1: print("page count == 1") print("1|" + ref_url) print("1|" + ref_url, file=page_log) div_tags = soup.find_all('div', {'class' : "search-results-item"}) print(div_tags) for div in div_tags: try: ref = div.find('a', {'class' : "smallV110"})['href'] print(ref) link = "http://apps.webofknowledge.com" + ref except TypeError: link = "No link" try: span = div.find('span', {'class' : "reference-title"}) title = getValue(span.value.contents) except AttributeError: title = "Title: [not available]" print(link) print(title) print("1|" + src_DOI + "|" + src_AN + "|" + title + "|" + link) print("1|" + src_DOI + "|" + src_AN + "|" + title + "|" + link, file=result_log) except AttributeError: print("1|" + "None of the Cited Articles are in your subscription", file=page_log) print("1|" + src_DOI + "|" + src_AN + "||" + "None of the Cited Articles are in your subscription", file=result_log) f.close()
for line in file(result_file, "r"): DOI = "" AN = "" cited_ref_cnt = 0 citing_ref_cnt = 0 url = line.split('|')[1] f = urllib.urlopen(url) rsp = f.read() print(rsp, file=log) #### Beautifulsoup soup = BeautifulSoup(rsp, "html.parser") p_tags = soup.find_all('p', {'class': 'FR_field'}) #p_tags = p_tags[1:] for p in p_tags: try: span = getValue(p.span.contents) span = span.split(':')[0] if span == "DOI": DOI = getValue(p.value.contents) elif span == "Accession Number": AN = getValue(p.value.contents) elif span == "Cited References in Web of Science Core Collection": cited_ref_cnt = getValue(p.b.contents) elif span == "Times Cited in Web of Science Core Collection": citing_ref_cnt = getValue(p.b.contents) except AttributeError: pass print("DOI:%s AN:%s CNT1:%s CNT2:%s" % (DOI, AN, cited_ref_cnt, citing_ref_cnt), file=log) div = soup.find('div', {'class': 'block-text-content'})
def extract_data(data_url): field1 = "" field2 = "" field3 = "" field4 = "" field5 = "" field6 = "" field7 = "" field8 = "" field9 = "" field10 = "" field11 = "" field12 = "" field13 = "" field14 = "" field15 = "" field16 = "" field17 = "" field18 = "" field19 = "" field20 = "" field21 = "" field22 = "" field23 = "" field24 = "" field25 = "" field26 = "" f = urllib.urlopen(data_url) rsp = f.read() print("\n====================================\n", file=log) print(rsp, file=log) #### Beautifulsoup soup = BeautifulSoup(rsp, "html.parser") # kill all script and style elements for script in soup(["script", "style"]): script.extract() # rip it out #Code to extract Abstract, Keywords, Publisher, Categories/Classification, Document Information, DocumentType, Language div_tags = soup.find_all('div', {'class': 'block-record-info'}) for d in div_tags: print(d, file=log) try: title = getValue(d.div.contents) if title == "Abstract": field14 = getValue(d.p.contents) elif title == "Keywords": a_tags = d.p.find_all( 'a', {'title': "Find more records by this keywords plus"}) for a in a_tags: field15 = field15 + ", " + getValue(a.contents) field15 = field15[2:] elif title == "Publisher": field16 = getValue(d.p.value.contents) elif title == "Categories / Classification": field18 = d.p.get_text().split(':')[1] elif title == "Document Information": for doc in d.find_all('p'): span = getValue(doc.span.contents) value = doc.get_text().split(':')[1] if span == "Document Type:": field19 = value elif span == "Language:": field20 = value except AttributeError: pass #Code to extract Author, Volume, Issue, Pages, DOI, Published Date, ISSN, Research Domain, Accession Number, eISSN, IDS Number p_tags = soup.find_all('p', {'class': 'FR_field'}) #p_tags = p_tags[1:] for p in p_tags: try: span = getValue(p.span.contents) span = span.split(':')[0] if span == "By": a_tags = p.find_all( 'a', {'title': "Find more records by this author"}) for a in a_tags: field2 = field2 + "; " + getValue(a.contents) try: field3 = field3 + "; " + a.next_sibling.split( '(')[1].split(')')[0].strip() except IndexError: pass field2 = field2[2:] field3 = field3[2:] elif span == "Reprint Address": field4 = p.span.next_sibling.split('(')[0].strip() field5 = p.find('td', {'class': 'fr_address_row2'}).contents[0] elif span == "Addresses": tr_tags = p.find_all('tr') for tr in tr_tags: try: field6 = field6 + "; " + getValue( tr.preferred_org.contents) except AttributeError: pass field6 = field6[2:] a_tags = p.find_all('a', {'name': re.compile("addressWOS:*")}) for a in a_tags: field7 = field7 + "; " + getValue(a.contents) field7 = field7[2:] elif span == "E-mail Addresses": a_tags = p.find_all('a') for a in a_tags: field8 = field8 + "; " + getValue(a.contents) field8 = field8[2:] elif span == "Volume": field9 = getValue(p.value.contents) elif span == "Issue": field10 = getValue(p.value.contents) elif span == "Pages": field11 = getValue(p.value.contents) elif span == "DOI": field12 = getValue(p.value.contents) elif span == "Published": field13 = getValue(p.value.contents) elif span == "ISSN": field22 = getValue(p.value.contents) elif span == "Research Domain ": field17 = getValue(p.value.contents) elif span == "Accession Number": field21 = getValue(p.value.contents) elif span == "eISSN": field23 = getValue(p.value.contents) elif span == "IDS Number": field24 = getValue(p.value.contents) elif span == "Cited References in Web of Science Core Collection": field25 = getValue(p.a.b.contents) elif span == "Times Cited in Web of Science Core Collection": field26 = getValue(p.b.contents) except AttributeError: pass record = "|".join([ field1, field2, field3, field4, field5, field6, field7, field8, field9, field10, field11, field12, field13, field14, field15, field16, field17, field18, field19, field20, field21, field22, field23, field24, field25, field26 ]) f.close() return record
#add = raw_input("Want to add another search field? (Y/N): ") #add = 'n' #if add not in ['Y', 'y']: # break br.submit() print("\n=============================\n") result = br.response().read() soup = BeautifulSoup(result, 'html.parser') ############ print(result, file=ex_log) ex_log.close() br.back() result = soup.find('span', id="trueFinalResultCount") total_count = getValue(result.contents) print("Number of search results: %s" % (total_count)) search_count = soup.find('span', id='pageCount.top') page_count = getValue(search_count.contents) print("Number of search pages: %s" % (page_count)) page_links = soup.find('a', {'class' : 'paginationNext', 'title' : 'Next Page'}) page_link = page_links['href'] page_link = page_link[:len(page_link)-1] i=1 page_file ="/home/usha/python/log/page.txt" page_log = open(page_file, "w") result_file = "/home/usha/python/log/result.txt" result_log = open(result_file, "w") #while i <= 2:
def set_Timespan(): while True: print("\n=============================\n") print("1. Range Selection") print("2. Year Range") radio = raw_input("Select an option for timespan: ") div = soup.find('div', id="timespan") if int(radio) == 1: br.form.set_value(['Range Selection'], name="period") # Select range from drop down list ts_tags = div.find('input', {'name': 'period'}).find_all('option') while True: print("\n=============================\n") for cnt, ts in enumerate(ts_tags): print("%s: %s" % (cnt + 1, getValue(ts.contents))) print("\n=============================\n") choice = raw_input("Please select one of the above options: ") if int(choice) in [1, 2, 3, 4, 5, 6]: break else: print("Invalid choice, select again") timespan = get_Range(choice) control_select = br.form.find_control(type='select', name='range') # loop through drop down list items of timespan for each in control_select.items: if each.name == timespan: each.selected = True break break elif int(radio) == 2: br.form.set_value(['Year Range'], name="period") from_sel = div.find('select', { 'name': 'startYear' }).find_all('option') from_start = from_sel[0]['value'] from_end = from_sel[len(from_sel) - 1]['value'] to_sel = div.find('select', {'name': 'endYear'}).find_all('option') to_start = to_sel[0]['value'] to_end = to_sel[len(to_sel) - 1]['value'] startYear = int( raw_input("Please select the Start year between %s and %s: " % (from_start, from_end))) endYear = int( raw_input("Please select the End year between %s and %s: " % (to_start, to_end))) #startYear=2010 #endYear=2016 control_start = br.form.find_control(type='select', name='startYear') # loop through drop down list of startYear for each in control_start.items: if each.name == startYear: each.selected = True break control_end = br.form.find_control(type='select', name='endYear') # loop through drop down list of endYear for each in control_end.items: if each.name == endYear: each.selected = True break break else: print("Invalid option selected, choose again")
#add = raw_input("Want to add another search field? (Y/N): ") #add = 'n' #if add not in ['Y', 'y']: # break br.submit() print("\n=============================\n") result = br.response().read() soup = BeautifulSoup(result, 'html.parser') ############ print(result, file=ex_log) ex_log.close() br.back() result = soup.find('span', id="trueFinalResultCount") total_count = getValue(result.contents) print("Number of search results: %s" % (total_count)) search_count = soup.find('span', id='pageCount.top') page_count = getValue(search_count.contents) print("Number of search pages: %s" % (page_count)) page_links = soup.find('a', {'class': 'paginationNext', 'title': 'Next Page'}) page_link = page_links['href'] page_link = page_link[:len(page_link) - 1] i = 1 page_file = "/home/usha/python/log/page.txt" page_log = open(page_file, "w") result_file = "/home/usha/python/log/result.txt" result_log = open(result_file, "w") #while i <= 2:
# Steps to select timespan while True: print("\n=============================\n") print("1. Range Selection") print("2. Year Range") radio = raw_input("Select an option for timespan: ") div = soup.find('div', id="timespan") if int(radio) == 1: br.form.set_value(['Range Selection'], name="period") # Select range from drop down list ts_tags = div.find('input', {'name' : 'period'}).find_all('option') while True: print("\n=============================\n") for cnt, ts in enumerate(ts_tags): print("%s: %s" % (cnt+1, getValue(ts.contents))) print("\n=============================\n") choice = raw_input("Please select one of the above options: ") if int(choice) in [1, 2, 3, 4, 5, 6]: break else: print("Invalid choice, select again") timespan = get_Range(choice) control_select = br.form.find_control(type='select', name='range') # loop through drop down list items of timespan for each in control_select.items: if each.name == timespan: each.selected = True break break
for line in file(result_file, "r"): DOI="" AN="" cited_ref_cnt=0 citing_ref_cnt=0 url = line.split('|')[1] f = urllib.urlopen(url) rsp = f.read() print(rsp, file=log) #### Beautifulsoup soup = BeautifulSoup(rsp, "html.parser") p_tags = soup.find_all('p', {'class' : 'FR_field'}) #p_tags = p_tags[1:] for p in p_tags: try: span = getValue(p.span.contents) span = span.split(':')[0] if span == "DOI": DOI=getValue(p.value.contents) elif span == "Accession Number": AN=getValue(p.value.contents) elif span == "Cited References in Web of Science Core Collection": cited_ref_cnt=getValue(p.b.contents) elif span == "Times Cited in Web of Science Core Collection": citing_ref_cnt=getValue(p.b.contents) except AttributeError: pass print("DOI:%s AN:%s CNT1:%s CNT2:%s" % (DOI, AN, cited_ref_cnt, citing_ref_cnt), file=log) div = soup.find('div', {'class' : 'block-text-content'}) print("\n===========================\n", file=log) print(div, file=log)
def set_Timespan(): while True: print("\n=============================\n") print("1. Range Selection") print("2. Year Range") radio = raw_input("Select an option for timespan: ") div = soup.find('div', id="timespan") if int(radio) == 1: br.form.set_value(['Range Selection'], name="period") # Select range from drop down list ts_tags = div.find('input', {'name' : 'period'}).find_all('option') while True: print("\n=============================\n") for cnt, ts in enumerate(ts_tags): print("%s: %s" % (cnt+1, getValue(ts.contents))) print("\n=============================\n") choice = raw_input("Please select one of the above options: ") if int(choice) in [1, 2, 3, 4, 5, 6]: break else: print("Invalid choice, select again") timespan = get_Range(choice) control_select = br.form.find_control(type='select', name='range') # loop through drop down list items of timespan for each in control_select.items: if each.name == timespan: each.selected = True break break elif int(radio) == 2: br.form.set_value(['Year Range'], name="period") from_sel = div.find('select', {'name' : 'startYear'}).find_all('option') from_start = from_sel[0]['value'] from_end = from_sel[len(from_sel)-1]['value'] to_sel = div.find('select', {'name' : 'endYear'}).find_all('option') to_start = to_sel[0]['value'] to_end = to_sel[len(to_sel)-1]['value'] startYear = int(raw_input("Please select the Start year between %s and %s: " % (from_start, from_end))) endYear = int(raw_input("Please select the End year between %s and %s: " % (to_start, to_end))) #startYear=2010 #endYear=2016 control_start = br.form.find_control(type='select', name='startYear') # loop through drop down list of startYear for each in control_start.items: if each.name == startYear: each.selected = True break control_end = br.form.find_control(type='select', name='endYear') # loop through drop down list of endYear for each in control_end.items: if each.name == endYear: each.selected = True break break else: print("Invalid option selected, choose again")
control_text = br.form.find_control(type='text', name="value(input1)") control_text.value = text # Steps to select timespan while True: radio = 2 div = soup.find('div', id="timespan") if int(radio) == 1: br.form.set_value(['Range Selection'], name="period") # Select range from drop down list ts_tags = div.find('input', {'name': 'period'}).find_all('option') while True: print("\n=============================\n", file=log) for cnt, ts in enumerate(ts_tags): print("%s: %s" % (cnt + 1, getValue(ts.contents)), file=log) print("\n=============================\n", file=log) choice = raw_input("Please select one of the above options: ") if int(choice) in [1, 2, 3, 4, 5, 6]: break else: print("Invalid choice, select again", file=log) timespan = get_Range(choice) control_select = br.form.find_control(type='select', name='range') # loop through drop down list items of timespan for each in control_select.items: if each.name == timespan: each.selected = True break break
if pos == "QB": roster['qb'].append((p, v)) elif pos == "RB": roster['rb'].append((p, v)) elif pos == "WR": roster['wr'].append((p, v)) elif pos == "TE": roster['te'].append((p, v)) # sort roster['qb'].sort(key=lambda e:e[1]) roster['rb'].sort(key=lambda e:e[1]) roster['wr'].sort(key=lambda e:e[1]) roster['te'].sort(key=lambda e:e[1]) start_val, bench_val = getValue(roster) team_rosters[owner] = roster team_start_val[owner] = start_val team_bench_val[owner] = bench_val team_info.append({ 'owner': owner, 'start_val': start_val, 'bench_val': bench_val }) # print value info team_info.sort(key=lambda e:-e['start_val']) print("owner, start_val, bench_val") start_val = {}