def request_keyword(self, keyword): try: time.sleep(1) proxy = 'socks5://127.0.0.1:{}'.format(random.choice(self.socks_port_list)) headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36' , 'Referer':'https://www.google.com'} url = self.search_url.format(keyword) rq = requests.get(url, headers=headers, proxies = {'http': proxy,'https': proxy}, timeout=3) print ("Requested {}".format(url)) if rq.status_code == 200: parsed = self.parse_result(keyword, rq.text) save_json(download_dirpath=self.download_path, filename=keyword, data=parsed) print ("Saved {}".format(keyword)) return rq else: return None except requests.RequestException as e: print ("ERROR request_keyword {}".format(keyword)) print (e) return None
def get_msg_score(email,token, use_psync): # current_time_stamp = get_current_time_stamp() upto_weeks = 4 # past_time_stamp = get_old_time_stamp(7) # token = get_access_token(email) # get_sent_msg_count_in_range(current_time_stamp,past_time_stamp,email,token) sent_stat = [None] * upto_weeks receive_stat = [None] * upto_weeks unread_stat = [None] * upto_weeks # unseen_stat = [None] * upto_weeks # print "get_msg_score" for i in xrange(0,upto_weeks): # print 'i = ',i if i == 0: future_time_stamp = helper.get_current_time_stamp() else: future_time_stamp = helper.get_old_time_stamp(i*7) past_time_stamp = helper.get_old_time_stamp((i+1) * 7) sent_stat[i] = get_msg_count_in_range(future_time_stamp,past_time_stamp,email,token,'from', use_psync) # print 'sent_stat' receive_stat[i] = get_msg_count_in_range(future_time_stamp,past_time_stamp,email,token,'to', use_psync) # print 'receive_stat' unread_stat[i] = get_thread_participant_score_with_tags(future_time_stamp,past_time_stamp,token,'unread',email, use_psync) # print 'unread_stat' # unseen_stat[i] = get_thread_participant_score_with_tags(future_time_stamp,past_time_stamp,token,'unseen',email) # print 'unseen_stat' helper.save_json('sent_last.json',sent_stat) helper.save_json('receive_last.json',receive_stat) helper.save_json('unread_last.json',unread_stat) # helper.save_json('unseen_last.json',unseen_stat) return
def update_fighters(): #use to update the main fighterID db - run once a week or to get any new FIDs from the #front page of bestfightodds with open(self.fighter_lst_address ,'r') as fp: fighter_odds_lst = json.load(fp) #pull the front page r = requests.get(self.bestfightOddsUrl) soup = bs.BeautifulSoup(r.content,'lxml') for a in soup.find_all(['span','a'], href=re.compile('fighters')): #if the fighter links are not in the fighter odds list if a['href'] not in fighter_odds_lst: fighter_odds_lst.append(a['href']) c = str(datetime.date.today()) h.save_json(self.fighter_lst_addres, fighter_lst) # self.prev_fiter_lst_address = self.fighter_lst_address self.fighter_lst_address = "data\fighter_odds_%s.json" %c
def compute_primitive_score(email, use_psync): sent_stat = helper.load_json('sent_last.json') receive_stat = helper.load_json('receive_last.json') unread_stat = helper.load_json('unread_last.json') # unseen_stat = helper.load_json('unseen_last.json') # recent_contact = get_recent_contact([sent_stat,receive_stat,unread_stat,unseen_stat]) recent_contact = get_recent_contact([sent_stat, receive_stat, unread_stat]) # pprint.pprint(recent_contact) score = {} for contact in recent_contact: sent_score = 4 * get_count_if_exist_else_zero(sent_stat[0],contact) +\ 3 * get_count_if_exist_else_zero(sent_stat[1],contact) +\ 2 * get_count_if_exist_else_zero(sent_stat[2],contact) +\ 1 * get_count_if_exist_else_zero(sent_stat[3],contact) receive_score = 4 * get_count_if_exist_else_zero(receive_stat[0],contact)+\ 3 * get_count_if_exist_else_zero(receive_stat[1],contact) +\ 2 * get_count_if_exist_else_zero(receive_stat[2],contact) +\ 1 * get_count_if_exist_else_zero(receive_stat[3],contact) unread_score = 0 * get_count_if_exist_else_zero(unread_stat[0],contact)+\ 3 * get_count_if_exist_else_zero(unread_stat[1],contact) +\ 2 * get_count_if_exist_else_zero(unread_stat[2],contact) +\ 1 * get_count_if_exist_else_zero(unread_stat[3],contact) # unseen_score = 0 * get_count_if_exist_else_zero(unseen_stat[0],contact)+\ # 3 * get_count_if_exist_else_zero(unseen_stat[1],contact) +\ # 2 * get_count_if_exist_else_zero(unseen_stat[2],contact) +\ # 1 * get_count_if_exist_else_zero(unseen_stat[3],contact) # score[contact] = sent_score + receive_score - unread_score - unseen_score score[contact] = sent_score + receive_score - unread_score helper.save_json('score.json', score) return score
def compute_primitive_score(email, use_psync): sent_stat = helper.load_json('sent_last.json') receive_stat = helper.load_json('receive_last.json') unread_stat = helper.load_json('unread_last.json') # unseen_stat = helper.load_json('unseen_last.json') # recent_contact = get_recent_contact([sent_stat,receive_stat,unread_stat,unseen_stat]) recent_contact = get_recent_contact([sent_stat,receive_stat,unread_stat]) # pprint.pprint(recent_contact) score = {} for contact in recent_contact: sent_score = 4 * get_count_if_exist_else_zero(sent_stat[0],contact) +\ 3 * get_count_if_exist_else_zero(sent_stat[1],contact) +\ 2 * get_count_if_exist_else_zero(sent_stat[2],contact) +\ 1 * get_count_if_exist_else_zero(sent_stat[3],contact) receive_score = 4 * get_count_if_exist_else_zero(receive_stat[0],contact)+\ 3 * get_count_if_exist_else_zero(receive_stat[1],contact) +\ 2 * get_count_if_exist_else_zero(receive_stat[2],contact) +\ 1 * get_count_if_exist_else_zero(receive_stat[3],contact) unread_score = 0 * get_count_if_exist_else_zero(unread_stat[0],contact)+\ 3 * get_count_if_exist_else_zero(unread_stat[1],contact) +\ 2 * get_count_if_exist_else_zero(unread_stat[2],contact) +\ 1 * get_count_if_exist_else_zero(unread_stat[3],contact) # unseen_score = 0 * get_count_if_exist_else_zero(unseen_stat[0],contact)+\ # 3 * get_count_if_exist_else_zero(unseen_stat[1],contact) +\ # 2 * get_count_if_exist_else_zero(unseen_stat[2],contact) +\ # 1 * get_count_if_exist_else_zero(unseen_stat[3],contact) # score[contact] = sent_score + receive_score - unread_score - unseen_score score[contact] = sent_score + receive_score - unread_score helper.save_json('score.json',score) return score
def get_msg_score(email, token, use_psync): # current_time_stamp = get_current_time_stamp() upto_weeks = 4 # past_time_stamp = get_old_time_stamp(7) # token = get_access_token(email) # get_sent_msg_count_in_range(current_time_stamp,past_time_stamp,email,token) sent_stat = [None] * upto_weeks receive_stat = [None] * upto_weeks unread_stat = [None] * upto_weeks # unseen_stat = [None] * upto_weeks # print "get_msg_score" for i in xrange(0, upto_weeks): # print 'i = ',i if i == 0: future_time_stamp = helper.get_current_time_stamp() else: future_time_stamp = helper.get_old_time_stamp(i * 7) past_time_stamp = helper.get_old_time_stamp((i + 1) * 7) sent_stat[i] = get_msg_count_in_range(future_time_stamp, past_time_stamp, email, token, 'from', use_psync) # print 'sent_stat' receive_stat[i] = get_msg_count_in_range(future_time_stamp, past_time_stamp, email, token, 'to', use_psync) # print 'receive_stat' unread_stat[i] = get_thread_participant_score_with_tags( future_time_stamp, past_time_stamp, token, 'unread', email, use_psync) # print 'unread_stat' # unseen_stat[i] = get_thread_participant_score_with_tags(future_time_stamp,past_time_stamp,token,'unseen',email) # print 'unseen_stat' helper.save_json('sent_last.json', sent_stat) helper.save_json('receive_last.json', receive_stat) helper.save_json('unread_last.json', unread_stat) # helper.save_json('unseen_last.json',unseen_stat) return
temples['imgUrl_5'] = xls.GetSheetValue(row, col_imgUrl_5) temples['place'] = xls.GetSheetValue(row, col_place) temples['surPlace'] = helper.remove_substring( xls.GetSheetValue(row, col_place)) temples['longBrief'] = helper.capitalize_first( xls.GetSheetValue(row, col_longBrief)) \ .replace('\t', ' ').replace(' ', ' ') temples['srcUrl'] = xls.GetSheetValue(row, col_srcUrl) temples['eparchyUrl'] = xls.GetSheetValue(row, col_eparchyUrl) temples['abbots'] = helper.capitalize_first( xls.GetSheetValue(row, col_abbots)) entities.append(temples) except Exception as e: print(f'Exception input line in row {row}: {temples}') raise Exception(e) helper.clear_folder(helper.get_full_path(root_folder)) helper.create_folder(helper.get_full_path(root_folder)) for i in range(len(entities)): item = entities[i] filename = 'file{}.json'.format(i) filename = helper.get_full_path(os.path.join(root_folder, filename)) helper.save_json(item, filename) print("Completed read to json files") exit(0)
def save(self): save_json(self.database_loc, self.settings, self) print('Saved.')