def short_put_calendar_spread(code, month): month1 = get_month(month, 1) month2 = get_month(month, 2) result0 = scrap(code, month) result1 = scrap(code, month1) result2 = scrap(code, month2) puts0 = result0['option']['put'] puts1 = result1['option']['put'] puts2 = result2['option']['put'] stock = result0['stock'] print('short_put_calendar_spread') print('stock_price: ' + str(stock.price)) print( '\n---------------------------------------------------------------------------' ) print( 'Maximum when the stock price is further away from strike price on the nearby expiry date\n' ) for put in puts0: if put.get_price() != '': for put1 in puts1: if put1.get_price() != '' and put1.strike == put.strike: long_put_calendar_spread_print(put, put1, month, month1) for put2 in puts2: if put2.get_price() != '' and put2.strike == put.strike: long_put_calendar_spread_print(put, put2, month, month2)
def short_call_calendar_spread(code, month): month1 = get_month(month, 1) month2 = get_month(month, 2) result0 = scrap(code, month) result1 = scrap(code, month1) result2 = scrap(code, month2) calls0 = result0['option']['call'] calls1 = result1['option']['call'] calls2 = result2['option']['call'] stock = result0['stock'] print('short_call_calendar_spread') print('stock_price: ' + str(stock.price)) print( '\n---------------------------------------------------------------------------' ) print( 'Stock price is further away from strike price on the nearby expiry date\n' ) for call in calls0: if call.get_price() != '': for call1 in calls1: if call1.get_price() != '' and call1.strike == call.strike: short_call_calendar_spread_print(call, call1, month, month1) for call2 in calls2: if call2.get_price() != '' and call2.strike == call.strike: short_call_calendar_spread_print(call, call2, month, month2)
def test2(self): ''' checks if data is scraped correctly for existing user with unscrapped data ''' scrap(self.username) config.cursor.execute( f"SELECT scrape,name,city FROM users WHERE username=\"{self.username}\"" ) self.credentials = config.cursor.fetchall()[0] self.assertEqual(1, self.credentials[0])
def test4(self): ''' checks functionality if there are no favourites ''' query = f"UPDATE users SET scrape=0,name=NULL,city=NULL WHERE username=\'{'swapnil.negi09'}\'" config.cursor.execute(query) config.cnx.commit() text3 = io.StringIO() sys.stdout = text3 scrap('swapnil.negi09') self.assertEqual(text3.getvalue(), "There are no favourites\n")
def test3(self): ''' checks if it just renders previous data if the data for respective user is already scraped ''' scrap(self.username) text2 = io.StringIO() sys.stdout = text2 scrap(self.username) sys.stdout = sys.__stdout__ self.assertEqual(text2.getvalue(), 'My name is Kanish and my current city is Roorkee\n')
def scrap_all(queries_file, results_count, out_dir): page = SerpScraperYandex(None, None) page.start() with open(queries_file, 'r') as in_: queries = [line.strip() for line in in_] for i, query in enumerate(queries): if i > 0: time.sleep(random.random() * 5 + 5) scrap(page, query, results_count, os.path.join(out_dir, query.replace('/', ' ').replace(' ', '_'))) page.close()
def __init__(self, code, month): self.month0 = month self.month1 = self.get_month(month, 1) self.month2 = self.get_month(month, 2) result0 = scrap(code, month) result1 = scrap(code, self.month1) result2 = scrap(code, self.month2) self.puts0 = result0['option']['put'] self.puts1 = result1['option']['put'] self.puts2 = result2['option']['put'] self.calls0 = result0['option']['call'] self.calls1 = result1['option']['call'] self.calls2 = result2['option']['call'] super().__init__(code, month)
def updateDB(): ## this function updates the database with the latest from isis.jhu.edu for each term and each focus area for term in ["Fall 2015"]: print(""); print(term); for focusA in ["instrumentation","celltissue","systems","imaging","computational"]: print(""); print(focusA); file = open("input/"+focusA+".txt"); for classNumber in file: sec = section(classNumber.strip(),term,focusA); ## print(classNumber.strip()); scrap(sec); ## this function scraps isis for data and updates the database file.close();
def bull_call_spread(code, month): result = scrap(code, month) call_options = result['option']['call'] stock = result['stock'] print('stock_price: ' + str(stock.price)) print( '\n---------------------------------------------------------------------------' ) option_index = [] for num in range(len(call_options)): if call_options[num].get_price() == '': continue option_index.append(num) for num in option_index: option1 = call_options[num] option1.position = OptionPosition.LONG for num2 in option_index: if num2 > num: option2 = call_options[num2] option2.position = OptionPosition.SHORT premium = float(option1.get_price()) - float( option2.get_price()) loss_max = premium even_price = float(option1.strike) + float( option1.get_price()) - float(option2.get_price()) # execute_at = call_option[TAG_STRIKEPRICE] win_max = float(option2.strike) - float( option1.strike) - premium options = [option1, option2] print_result(options, str(premium), str(even_price), str(even_price), str(loss_max), str(win_max), str(loss_max / win_max))
def synthetic_short_stock(code, month): result = scrap(code, month) put_options = result['option']['put'] call_options = result['option']['call'] stock = result['stock'] print('synthetic_short_stock') print('stock_price: ' + str(stock.price)) print( '\n---------------------------------------------------------------------------' ) for call in call_options: if call.get_price() != '': for put in put_options: if put.get_price() != '': if put.strike == call.strike: premium = float(call.get_price()) - float( put.get_price()) even_price = float(put.strike) + float(premium) loss_max = 'any above ' + str(even_price) win_max = 'any below ' + str(even_price) call.position = OptionPosition.SHORT put.position = OptionPosition.LONG options = [call, put] print_result(options, str(premium), str(even_price), str(-even_price), str(loss_max), str(win_max), str('N/A'))
def main(url, headers, cookies, BL, TP, SP, TA): response = requests.get(url, headers=headers, cookies=cookies) page_soup = soup(response.text, "html.parser") list_pub = page_soup.find_all("ul", {"class": "dropdown-menu"})[3] temp = list_pub.find_all('a') index = index_BL = index_TP = index_SP = index_TA = 0 BLws = TPws = SPws = TAws = '' for i in range(0, len(temp)): penerbit = temp[i].text penerbit = ' '.join(penerbit.split()) url = temp[i].get('href') while True: print(penerbit) response = requests.get(url, headers=headers, cookies=cookies) index, index_BL, index_TP, index_SP, index_TA, BLws, TPws, SPws, TAws = scrap( index, index_BL, index_TP, index_SP, index_TA, response, BL, BLws, TP, TPws, SP, SPws, TA, TAws, penerbit) page_soup = soup(response.text, "html.parser") next_page = page_soup.findAll("li", {"class": "next page"}) if next_page: link = next_page[0].find_all('a')[0].get('href') url = link else: break
def long_strangle(code, month): result = scrap(code, month) calls = result['option']['call'] puts = result['option']['put'] stock = result['stock'] print('long_strangle') print('stock_price: ' + str(stock.price)) print( '\n---------------------------------------------------------------------------' ) for call in calls: if call.get_price() != '': for put in puts: if put.get_price() != '' and call.strike > put.strike: premium = float(call.get_price()) + float(put.get_price()) lower_limit = float(put.strike) - float(premium) upper_limit = float(call.strike) + float(premium) even_price = str(lower_limit) + ' to ' + str(upper_limit) win_max = 'any below: ' + str( lower_limit) + ' and any above: ' + str(upper_limit) loss_max = premium call.position = OptionPosition.LONG put.position = OptionPosition.LONG options = [call, put] print_result(options, str(premium), str(even_price), str(even_price), str(loss_max), str(win_max), str('N/A'))
def short_strangle(code, month): result = scrap(code, month) calls = result['option']['call'] puts = result['option']['put'] stock = result['stock'] print('short_strangle') print('stock_price: ' + str(stock.price)) print( '\n---------------------------------------------------------------------------' ) for call in calls: if call.get_price() != '': for put in puts: if put.get_price() != '' and call.strike > put.strike: premium = float(call.get_price()) + float(put.get_price()) win_max = premium lower_limit = float(put.strike) - float(premium) upper_limit = float(call.strike) + float(premium) even_price = str(lower_limit) + ' to ' + str(upper_limit) loss_max = 'any below: ' + str( lower_limit) + ' and any above: ' + str(upper_limit) print('short call : ' + str(call)) print('short put: ' + str(put)) print('premium: ' + str(premium)) print('break even: ' + str(even_price)) print('max_loss: ' + str(loss_max)) print('max_win: ' + str(win_max)) print('\n')
def links(): BASE_URL = 'https://www.lcsd.gov.hk/CE/Museum/Space/zh_CN/web/spm/starshine/resources/constemyth/' url_bs = 'https://www.lcsd.gov.hk/CE/Museum/Space/zh_CN/web/spm/starshine/resources/constemyth/chinengstars.html' soup = scrap(url_bs) page_address = soup.find('div', class_="pagination").find_all('a') links = [] for link in page_address: page = BASE_URL + link['href'] links.append(page) return links
def __init__(self, code, month=None): self.code = code self.month = month if month is None: self.stock = get_stock(code) else: result = scrap(code, month) self.put_options = result['option']['put'] self.call_options = result['option']['call'] self.stock = result['stock'] self.pairs = self.get_pairs()
def long_put_calendar_spread(code, month): month1 = get_month(month, 1) month2 = get_month(month, 2) result0 = scrap(code, month) result1 = scrap(code, month1) result2 = scrap(code, month2) puts0 = result0['option']['put'] puts1 = result1['option']['put'] puts2 = result2['option']['put'] stock = result0['stock'] print('long_put_calendar_spread') print('stock_price: ' + str(stock.price)) # print('\n---------------------------------------------------------------------------') # for put in puts0: if put.get_price() != '': for put1 in puts1: if put1.get_price() != '' and put1.strike == put.strike: long_put_calendar_spread_print(put, put1, month, month1) for put2 in puts2: if put2.get_price() != '' and put2.strike == put.strike: long_put_calendar_spread_print(put, put2, month, month2)
def duedil_company_search(company_name, duedil_api_key): # Searches a company by its name # Requires scrap # Clean company name clean_company_name = company_name clean_company_name = clean_company_name.lower() clean_company_name = clean_company_name.replace(' ', '%20') # Do search search_url = 'http://duedil.io/v3/companies?filters={"name":"'+clean_company_name+'"}&api_key='+duedil_api_key search_response = scrap(search_url) if search_response: company_url_root = search_response["response"]["data"][0]["company_url"] company_url = company_url_root+'?api_key='+duedil_api_key+'&format=json' director_url = company_url_root+'/directors'+'?api_key='+duedil_api_key+'&format=json' else: return False # Company profile profile_response = scrap(company_url) if profile_response and 'response' in profile_response: company_profile = profile_response['response'] else: return False #director_response = scrap(director_url) #if director_response and 'response' in director_response: # company_profile['directors'] = director_response['response']['data'] #else: # return False return company_profile
def report(): raw_terms = request.args.get("term") # print(raw_terms) if raw_terms == "": return redirect("/") terms = raw_terms.split(" ") # print(terms) # for term in terms: # remove_space = terms[0].split(" ") multiple_term = [] # print(remove_space) for term in terms: print(term) if term == "": continue # Logic for exporting "+" string_plus = "" for char in term: if char == "+": char = "%2B" string_plus += char # print(term) # term = request.form[""] term = term.lower() history = db.get(term) if history: result = db[term] else: result = scrap(term) db[term] = result color = coloring(term) multiple_term.append({ "wework": result["wework"], # "wework_count":calc_len(result["wework"]), "stack": result["stack"], # "stack_count":len(result["stack"]), "remote": result["remote"], # "remote_count":len(result["remote"]), "total": result["total"], "term": term, "sub_term_for_plus": string_plus, "color": color }) # print(multiple_term) return render_template("report.html", terms_list=multiple_term, raw_terms=raw_terms # wework_count=len(multiple_term) )
def long_call_calendar_spread(code, month): month1 = get_month(month, 1) month2 = get_month(month, 2) result0 = scrap(code, month) result1 = scrap(code, month1) result2 = scrap(code, month2) calls0 = result0['option']['call'] calls1 = result1['option']['call'] calls2 = result2['option']['call'] stock = result0['stock'] print('long_call_calendar_spread') print('stock_price: ' + str(stock.price)) print( '\n---------------------------------------------------------------------------' ) for call in calls0: if call.get_price() != '': for call1 in calls1: if call1.get_price() != '' and call1.strike == call.strike: long_call_calendar_spread_print(call, call1, month, month1) for call2 in calls2: if call2.get_price() != '' and call2.strike == call.strike: long_call_calendar_spread_print(call, call2, month, month2)
def bing_the_query_field(query_field, bing_api_key, n_results): # Bings a query, returns the n first items bing_search_url = 'https://api.datamarket.azure.com/Data.ashx/Bing/Search/Web?Query='+ query_field + '&$format=json' #print bing_search_url bing_response = scrap(bing_search_url, bing_api_key) list_of_url = [] if bing_response: if 'd' in bing_response: if 'results' in bing_response['d']: for result in bing_response['d']['results']: list_of_url.append(result['Url']) return list_of_url[:min(len(list_of_url), n_results)]
def update(self): """ Update stock data by scrapping latest copy """ count = scrap() if count: dic = { "status": 1, "message": "{} stock records fetched and updated.".format(count), } else: dic = { "status": 0, "message": "Failed to update stock data", } return json.dumps(dic)
class SimpleTest(unittest.TestCase): global t def test(self): self.assertTrue(usee(), True) t = scrap('ayushjainaj20', nams, citys, works, favs, foun) def test_name(self): self.assertTrue(t[0], 'Ayush Jain') def test_city(self): self.assertTrue(t[1], 'Roorkee') def test_work(self): self.assertTrue(t[2], ['Cinematic Section,IIT Roorkee', 'IMG, IIT Roorkee']) def test_fav(self): self.assertTrue( t[3], { 'Other': [ 'Dhruv Sleeping in Unusual Places', 'Gaurav V/S Mankind', 'Cheerful Nihilism', 'hitesxh ヤング・ウチハ', 'Emotional Introvert', "Students' Affairs Council, IIT Roorkee", 'Choreography and Dance section IIT Roorkee', 'Data Science Group, IITR', 'Enactus IIT Roorkee', 'IMG, IIT Roorkee', 'Sarcasm', 'Iron Man - The Hustler', 'Kya-Gift-Du', 'Feynman Fans', 'ISRO - Indian Space Research Organisation', 'National Geographic TV', 'and more' ] }) def test_person(self): self.assertTrue(person.name, 'Ayush Jain') def test_person2(self): self.assertTrue(person.city, 'Roorkee') def test_person3(self): with self.assertRaises(MyException): Person('Ayush Jain', city='Roorkee', works='vwed')
def web_scrap(self): url = self.url_full.text() if not url: error_dialog = QtWidgets.QErrorMessage() error_dialog.showMessage( 'Пожалуйста, напишите или вставьте ссылку на сайт') error_dialog.exec_() return if not re.match(regex, url): error_dialog = QtWidgets.QErrorMessage() error_dialog.showMessage( 'Введённая вами ссылка не является URL-адресом или не полная.' ' Пожалуйста, введите или вставьте полную ссылку на сайт') error_dialog.exec_() return article = scrap.scrap(url) if not article: error_dialog = QtWidgets.QErrorMessage() error_dialog.showMessage( 'Пожалуйста, проверьте интернет соединение') error_dialog.exec_() return language = language_detect.detect_language(article) text, format_text = scrap.format_text(article, language) result = scrap.summarize(text, format_text, language, self.sent_count) name = QtWidgets.QFileDialog.getSaveFileName( self, "Save File", None, "All Files (*);;Text Files (*.txt)") if not name[0]: return file = open(name[0], 'w') res = "" for s in result: res += "{}\n".format(s) file.write(res) file.close()
def short_call_w_asset(code, month): result = scrap(code, month) call_options = result['option']['call'] stock = result['stock'] print('stock_price: ' + str(stock.price)) print( '\n---------------------------------------------------------------------------' ) for option in call_options: if option.get_price() == '': continue # loss_at_stock_price = stock_price - call_option[TAG_PRICE] loss_max = VAL_UNLIMIT premium = (float(stock.price) - float(option.strike)) even_price = float(stock.price) - float(option.get_price()) execute_at = option.strike win_max = float( option.get_price()) - (float(stock.price) - float(option.strike)) print('execute_at: > ' + str(execute_at)) print_result(option, str(premium), str(even_price), str(even_price), loss_max, str(win_max), VAL_UNLIMIT)
def star_bs(): rows = [] for link in links(): soup = scrap(link) tables = soup.find_all('table', class_='table_space') for table in tables: for tr in table.tbody.find_all('tr')[1:]: row = [] for td in tr.find_all('td'): row.append(td.text) rows.append(row) column_names = ['name_en', 'num', 'name', 'name_cn', 'ra', 'other'] df = pd.DataFrame(data=rows, columns=column_names) df['top20'] = np.where(df['name_with_suffix'].str.endswith('**'), 'yes', 'no') df['commonly_used'] = np.where(df['name_with_suffix'].str.endswith('*'), 'yes', 'no') df['name'] = [s.split(r' *', 1)[0].strip() for s in df['name_with_suffix']] df['name_cn'] = [s.replace(' ', '') for s in df['name_cn']] df.to_csv('assets/stars.csv')
def star_ch(): url = 'https://bk.tw.lvfukeji.com/wiki/%E6%98%9F%E5%BA%A7%E5%88%97%E8%A1%A8' soup = scrap(url) table = soup.find("table", class_='wikitable sortable') column_names = [th.text for th in table.tbody.tr.find_all('th')] rows = [] for tr in table.tbody.find_all('tr')[1:]: row = [] for td in tr.find_all('td'): row.append(td.text) rows.append(row) df = pd.DataFrame(rows, columns=column_names) df.columns = [ 'name_cn', 'abbr', 'name', 'area', 'ra', 'dec', 'quadrant', 'family', 'bs' ] df.set_index('name', inplace=True) df.bs = df.bs.apply(lambda x: x.strip('\n')) df1 = df.bs.str.extract(r'(?P<bs_name_bayer>.*)\((?P<bs_name_cn>.*)\)') df_ch = pd.merge(df, df1, on='name') df_ch.to_csv('assets/cons.csv')
def Scrapshow(self): self.close(); from scrap import scrap self.Scrapwindow=scrap(); self.Scrapwindow.show();
def on_post(self, req, resp): json_response = scrap.scrap() resp.status = falcon.HTTP_200 resp.body = json_response
love6_html = requests.get( 'https://www.melon.com/mymusic/dj/mymusicdjplaylistview_inform.htm?plylstSeq=455324377', headers=header).text love6_bs = BeautifulSoup(love6_html, "html.parser") love_6 = love6_bs.select('tr')[1:] love7_html = requests.get( 'https://www.melon.com/mymusic/dj/mymusicdjplaylistview_inform.htm?plylstSeq=455324377', headers=header).text love7_bs = BeautifulSoup(love7_html, "html.parser") love_7 = love7_bs.select('tr')[1:] love8_html = requests.get( 'https://www.melon.com/mymusic/dj/mymusicdjplaylistview_inform.htm?plylstSeq=430470805', headers=header).text love8_bs = BeautifulSoup(love8_html, "html.parser") love_8 = love8_bs.select('tr')[1:] love9_html = requests.get( 'https://www.melon.com/mymusic/dj/mymusicdjplaylistview_inform.htm?plylstSeq=456337994', headers=header).text love9_bs = BeautifulSoup(love9_html, "html.parser") love_9 = love9_bs.select('tr')[1:] love = scrap(love_1, 'love') + scrap(love_2, 'love') + scrap(love_3, 'love')\ + scrap(love_4, 'love') + scrap(love_5, 'love') + scrap(love_6, 'love')\ + scrap(love_7, 'love') + scrap(love_8, 'love') + scrap(love_9, 'love') love_file = open("love.json", "w+") love_file.write(json.dumps(love)) print('love finish')
NAME TEXT NOT NULL, QUESTS INT NOT NULL );""") url_file = open('urls.txt', 'r') urls = [] for i in url_file.readlines(): urls.append(i[:-1]) total = len(urls) tmp = 0 fail = 0 for i in urls: try: x = scrap.scrap(i) # Get the id by removing the first part of the url id = re.split('public_profiles/', i)[1] # Get the cursor cur = conn.cursor() # Check if the id exists in the database cur.execute("SELECT * FROM PARTICIPANTS where ID=?", (id, )) res = cur.fetchone() # If it exists, update the value if (res): conn.execute("UPDATE PARTICIPANTS SET QUESTS=? WHERE ID=?", (len(x["completed_quests"]), id)) else: conn.execute("INSERT INTO PARTICIPANTS VALUES(?,?,?)",
indie_html = requests.get('https://www.melon.com/genre/song_list.htm?gnrCode=GN0500&steadyYn=Y', headers=header).text indie_bs = BeautifulSoup(indie_html, "html.parser") indie50 = indie_bs.select('tr')[1:] # rock rock_html = requests.get('https://www.melon.com/genre/song_list.htm?gnrCode=GN0600&steadyYn=Y', headers=header).text rock_bs = BeautifulSoup(rock_html, "html.parser") rock50 = rock_bs.select('tr')[1:] # trot trot_html = requests.get('https://www.melon.com/genre/song_list.htm?gnrCode=GN0700&steadyYn=Y', headers=header).text trot_bs = BeautifulSoup(trot_html, "html.parser") trot50 = trot_bs.select('tr')[1:] # folk folk_html = requests.get('https://www.melon.com/genre/song_list.htm?gnrCode=GN0800&steadyYn=Y', headers=header).text folk_bs = BeautifulSoup(folk_html, "html.parser") folk50 = folk_bs.select('tr')[1:] steady_seller_all = scrap(ballad50) + scrap(dance50) + scrap(rap50) + scrap(rnb50) \ + scrap(indie50) + scrap(rock50) + scrap(trot50) + scrap(folk50) file = open("moviedata.json", "w+") file.write(json.dumps(steady_seller_all)) print('finish')
def get_put_options(code: str, month: str) -> List[Option]: data = scrap(code, month) return data['option']['put']
def get_call_options(code: str, month: str) -> List[Option]: data = scrap(code, month) return data['option']['call']