def patch(self, id): try: measurement = Measurement.objects(id=id).first() if measurement is not None: if get_formatted_date( measurement.created) != get_formatted_date( get_today_date()): raise BadRequest( f'Cannot update a measurement for {get_formatted_date(measurement.created)}' ) data = self.reqparse.parse_args() data = normalize_data(data) measurement.update(**data) measurement.reload() return measurement.to_dict(), 200 abort(404, message=f'Measurement ID={id} was not found') except BadRequest as e: app.logger.error(e) raise e except NotFound as e: app.logger.error(e) raise e except Exception as e: app.logger.error(e) abort(500, message=str(e))
def get(self): slug = self.get_argument("slug", None) tags = m_tag.get_tags(self.db) default_tag = tags[0] if len(tags) >= 1 else "python" logging.info("default tag:{}".format(default_tag)) default_article = { "title":"", "abstracts":"", "content":"", "slug":"", "tag":default_tag, "date":utils.get_today_date(), } article_id = "" if slug is not None: # Edit exists article default_article = m_article.get_article(self.db, slug) article_id = str(default_article["_id"]) username = self.current_username self.render("backend/article_edit.html", username=username, article=default_article, tags=tags, article_id=article_id, )
def items_to_csv(links, sitename): if DEBUG: logger.debug("Printing to CSV") dirname = "output" # d = datetime.date.today().strftime("%Y-%m-%d_%H%M") d = get_today_date() # TODO add date to file filename = dirname + "\\" + d + "_" + sitename.replace(" ", "") + ".csv" if DEBUG: logger.debug("Filename: " + filename) if not os.path.exists(dirname): os.makedirs(dirname) csv_columns = ['id', 'titolo', 'url', 'data', 'text', 'source'] rows = [] for data in links: if find_items_in_csv(filename, data['id']): if DEBUG: logger.debug(str("Elemento con %s già esistente" % data['id'])) continue txt = '' if data['text']: txt = data['text'] row = { 'id': data['id'], 'titolo': data['titolo'], 'url': data['url'].rstrip(), 'data': data['data'], 'text': txt, 'source': sitename } rows.append(row) try: with open(filename, 'a', newline='') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=csv_columns, delimiter=";", quoting=csv.QUOTE_ALL) for row in rows: try: writer.writerow(row) except: logger.err( str("Exception in items_to_csv on %s" % filename)) pass except IOError: logger.err(str("IOError in items_to_csv on %s" % filename))
def get_items(self): super().get_items() today = get_today_date() headers = super().set_headers() results = requests.get(self.url.strip(), headers=headers) soup = BeautifulSoup(results.text, "html.parser") container = soup.find('article', class_='main-feature') title = container.find('h2') links = [{ 'id': generate_link_id(title.text), 'titolo': title.text, 'text': '', 'url': title.find('a').get('href'), 'data': today }] def get_links_from_structure(articles): links = [] rows = articles.find_all('div', class_='row') for row in rows: a = row.find('a') if a: h3 = a.find('h3') if h3: links.append({ 'id': generate_link_id(h3.text), 'titolo': h3.text, 'text': '', 'url': a.get('href'), 'data': today }) return links container = soup.find('div', class_='cards') links = get_links_from_structure(container) self.links = links
def post(self): article_id = self.get_argument("id", "") title = self.get_argument("title", None) abstracts = self.get_argument("abstracts", "") content = self.get_argument("content", "") slug = self.get_argument("slug", None) tag = self.get_argument("tag", None) date = self.get_argument("date", None) status = self.get_argument("status", None) if title is None or title == "": return self.write({"err":True, "msg":u"标题不能为空"}) if slug is None or slug == "": return self.write({"err":True, "msg":u"slug不能为空"}) is_unique_slug = m_article.is_unique_slug(self.db, slug, article_id) logging.info("is unique slug:{}".format(is_unique_slug)) if not is_unique_slug: return self.write({"err":True, "msg":u"当前slug已存在"}) if tag is None or tag == "": return self.write({"err":True, "msg":u"标签不能为空"}) if status not in [STATUS_PUBLIC, STATUS_SAVE]: return self.write({"err":True, "msg":u"文章status不正确"}) if date is None: date = utils.get_today_date() article = { "title":title, "abstracts":abstracts, "content":content, "slug":slug, "tag":tag, "date":date, "status":status, } result = m_article.update_article(self.db, article) if result: self.write({"err":False, "msg":u"操作成功!"}) else: self.write({"err":True, "msg":u"操作失败!稍后再试"})
def run(): start = time.time() today_date = get_today_date() CN_list = china_all_stock_list bs.login() # 国内股票 # get_china_stock_day_data(stock_list= CN_list, # start_date='2006-01-01', # end_date=today_date, # file_path=china_stock_day_file_path, # update=False # ) # # # 获取国内股票15min # get_china_stock_min__data(stock_list= CN_list, # start_date='2015-01-01', # end_date=today_date, # file_path=china_stock_min_file_path, # update= False) # a = 0 for i in CN_list: name = str(i) + 'day' thr = threading.Thread(target=get_china_stock_day_data, args=(i, '2006-01-01', today_date, china_stock_day_file_path, False, "d"), name=name) thr.start() # print("线程id: 日线{},代码{}".format(threading.current_thread().name, i)) thr_min = threading.Thread(target=get_china_stock_min__data, args=(i, '2015-01-01', today_date, china_stock_min_file_path, False, '15'), name=str(i) + 'min') thr_min.start() if threading.active_count() > 16: time.sleep(5) print("线程id: 个数: {},代码: {}".format(threading.active_count(), i)) a += 1 bs.logout() end = time.time() print('总耗时:%s' % (end - start))
def answer_question_mutiple_blank(self, element): """多项填空题&横向填空题&日期题&时间题&上传题""" # 地理位置题 maps = element.find_elements_by_css_selector('.map_a') if maps: maps[0].click() time.sleep(1) return upload_files = element.find_elements_by_class_name('selectFile') if upload_files: for upload_file in upload_files: upload_file.send_keys(conf.UPLOAD_FILE_NAME) return # 多项填空题 input_blanks = element.find_elements_by_css_selector('textarea') # 横向填空题 if not input_blanks: input_blanks = element.find_elements_by_css_selector('input') if input_blanks: for index, input_blank in enumerate(input_blanks): text = u'啦啦啦' + str(index + 1) * 3 input_blank.send_keys(text) # 日期题 zebra_datepicker = element.find_elements_by_css_selector('.Zebra_DatePicker_Icon') if zebra_datepicker: input_blank = element.find_element_by_css_selector('input') self.driver.execute_script('arguments[0].removeAttribute("readonly");', input_blank) today = utils.get_today_date() input_blank.send_keys(today) # 时间题 selects = element.find_elements_by_css_selector('select') if selects: for select in selects: options = select.find_elements_by_css_selector('option') options[9].click()
def put(self, id): """ Updates a measurement given its ID """ measurement = Measurement.objects(id=id).first() if measurement is not None: if get_formatted_date(get_today_date()) != get_formatted_date(measurement.created): self.abort_with_http_code_error( 400, f'Cannot update a measurement for {get_formatted_date(measurement.created)}' ) data = self.reqparse.parse_args() data = normalize_data(data) if not data: self.abort_with_http_code_error( 400, 'Payload cannot be empty' ) measurement.update(**data) measurement.reload() return measurement.to_dict(), 200 self.abort_with_http_code_error(404, f'Measurement ID={id} was not found')
def patch(self, id): try: import pdb pdb.set_trace() measurement = Measurement.objects(id=id).first() if measurement is not None: if get_formatted_date( measurement.created) != get_formatted_date( get_today_date()): raise BadRequest( f'Cannot update a measurement for {measurement.created}' ) data = self.reqparse.parse_args() data = normalize_data(data) # eliminamos los valores None measurement.update(**data) measurement.reload() return measurement.to_dic(), 200 abort(404, message=f'Measurement ID={id} was not found') except BadRequest as e: raise e except NotFound as e: raise e except Exception as e: abort(500, message=str(e))
def organizations(request): """ To fetch and update the Organizations entity """ print("\n-------Getting Organizations entities-------\n") END_POINT = 'searches/organizations' YESTURDAY_DATE = utils.get_yesterday_date() TODAY_DATE = utils.get_today_date() COLLECTION_NAME = 'organization_entities' QUERY = { "field_ids": [ "acquirer_identifier", "aliases", "categories", "category_groups", "closed_on", "company_type", "contact_email", "created_at", "delisted_on", "demo_days", "description", "diversity_spotlights", "entity_def_id", "equity_funding_total", "exited_on", "facebook", "facet_ids", "founded_on", "founder_identifiers", "funding_stage", "funding_total", "funds_total", "hub_tags", "identifier", "image_id", "image_url", "investor_identifiers", "investor_stage", "investor_type", "ipo_status", "last_equity_funding_total", "last_equity_funding_type", "last_funding_at", "last_funding_total", "last_funding_type", "layout_id", "legal_name", "linkedin", "listed_stock_symbol", "location_group_identifiers", "location_identifiers", "name", "num_acquisitions", "num_alumni", "num_articles", "num_current_advisor_positions", "num_current_positions", "num_diversity_spotlight_investments", "num_employees_enum", "num_enrollments", "num_event_appearances", "num_exits", "num_exits_ipo", "num_founder_alumni", "num_founders", "num_funding_rounds", "num_funds", "num_investments", "num_investors", "num_lead_investments", "num_lead_investors", "num_past_positions", "num_portfolio_organizations", "num_sub_organizations", "operating_status", "override_layout_id", "owner_identifier", "permalink", "permalink_aliases", "phone_number", "program_application_deadline", "program_duration", "program_type", "rank_delta_d30", "rank_delta_d7", "rank_delta_d90", "rank_org", "rank_principal", "revenue_range", "school_method", "school_program", "school_type", "short_description", "status", "stock_exchange_symbol", "stock_symbol", "twitter", "updated_at", "uuid", "valuation", "valuation_date", "website", "website_url", "went_public_on" ], "order": [{ "field_id": "rank_org", "sort": "asc" }], "query": [ { "type": "predicate", "field_id": "updated_at", "operator_id": "gte", "values": [str(YESTURDAY_DATE)] }, ], "limit": 1000 } total_count, entities = utils.fetch_data(QUERY, END_POINT) if total_count is None: return "Error in parsing the API response. Please check the logs." print("total count: ", total_count) # get the organization collection org_col = utils.get_mongodb_collection(COLLECTION_NAME) fetch_records_count = 0 while fetch_records_count < total_count: if fetch_records_count != 0: _, entities = utils.fetch_data(QUERY, END_POINT) if not entities: print("no entities left i.e., entities = %s. moving on." % len(entities)) break for e in entities: if e: e['insert_date'] = TODAY_DATE else: print("Entity is empty: ", e) inserted = org_col.insert_many(entities) fetch_records_count += len(entities) print("inserted records: ") pprint(inserted.inserted_ids) print("total_count: ", total_count, ", fetched records: ", fetch_records_count) # get the last record print("------------------------") after_id = entities[-1].get('uuid', None) if after_id: print("Get next batch after id: ", after_id) # print("Entities len: ", ) QUERY['after_id'] = after_id entities.clear() msg = { 'entity': 'Organization', 'total_record_updated': fetch_records_count } return jsonify(msg)
def press_references(request): """ To fetch and update the Press References entity. """ print("\n-------Getting Press References entities-------\n") COLLECTION_NAME = 'press_reference_entities' END_POINT = 'searches/press_references' TODAY_DATE = utils.get_today_date() YESTURDAY_DATE = utils.get_yesterday_date() QUERY = { "field_ids": [ "activity_entities", "author", "created_at", "entity_def_id", "identifier", "posted_on", "publisher", "thumbnail_url", "title", "updated_at", "url", "uuid" ], "query": [ { "type": "predicate", "field_id": "updated_at", "operator_id": "gte", "values": [str(YESTURDAY_DATE)] }, ], "order": [{ "field_id": "updated_at", "sort": "asc", "nulls": "last" }], "limit": 1000, } total_count, entities = utils.fetch_data(QUERY, END_POINT) if total_count is None: return "Error in parsing the API response. Please check the logs." print("total count: ", total_count) # get the press_references collection col = utils.get_mongodb_collection(COLLECTION_NAME) fetch_records_count = 0 # storing into the database and pagination while fetch_records_count < total_count: if fetch_records_count != 0: _, entities = utils.fetch_data(QUERY, END_POINT) if not entities: print("no entities left i.e., entities = %s. moving on." % len(entities)) break for e in entities: if e: e['insert_date'] = TODAY_DATE else: print("Entity is empty: ", e) inserted = col.insert_many(entities) fetch_records_count += len(entities) print("inserted records: ") pprint(inserted.inserted_ids) print("total_count: ", total_count, ", fetched records: ", fetch_records_count) print("------------------------") # get the last record after_id = entities[-1].get('uuid', None) if after_id: print("Get next batch after id: ", after_id) # print("Entities len: ", ) QUERY['after_id'] = after_id entities.clear() msg = { 'entity': 'press_references', 'total_record_updated': fetch_records_count } return jsonify(msg)
def acquisitions(request): """ To fetch and update the Acquisitions entity. """ print("\n-------Getting Acquisitions entities-------\n") COLLECTION_NAME = 'acquisitions_entities' END_POINT = 'searches/acquisitions' TODAY_DATE = utils.get_today_date() YESTURDAY_DATE = utils.get_yesterday_date() QUERY = { "field_ids": [ "acquiree_categories", "acquiree_funding_total", "acquiree_identifier", "acquiree_last_funding_type", "acquiree_locations", "acquiree_num_funding_rounds", "acquiree_revenue_range", "acquiree_short_description", "acquirer_categories", "acquirer_funding_stage", "acquirer_funding_total", "acquirer_identifier", "acquirer_locations", "acquirer_num_funding_rounds", "acquirer_revenue_range", "acquirer_short_description", "acquisition_type", "announced_on", "completed_on", "created_at", "disposition_of_acquired", "entity_def_id", "identifier", "permalink", "price", "rank_acquisition", "short_description", "status", "terms", "updated_at", "uuid" ], "query": [ { "type": "predicate", "field_id": "updated_at", "operator_id": "gte", "values": [str(YESTURDAY_DATE)] }, ], "order": [{ "field_id": "updated_at", "sort": "asc", "nulls": "last" }], "limit": 1000, } total_count, entities = utils.fetch_data(QUERY, END_POINT) if total_count is None: return "Error in parsing the API response. Please check the logs." print("total count: ", total_count) # get the acquisitions collection col = utils.get_mongodb_collection(COLLECTION_NAME) fetch_records_count = 0 # storing into the database and pagination while fetch_records_count < total_count: if fetch_records_count != 0: _, entities = utils.fetch_data(QUERY, END_POINT) if not entities: print("no entities left i.e., entities = %s. moving on." % len(entities)) break for e in entities: if e: e['insert_date'] = TODAY_DATE else: print("Entity is empty: ", e) inserted = col.insert_many(entities) fetch_records_count += len(entities) print("inserted records: ") pprint(inserted.inserted_ids) print("total_count: ", total_count, ", fetched records: ", fetch_records_count) print("------------------------") # get the last record after_id = entities[-1].get('uuid', None) if after_id: print("Get next batch after id: ", after_id) # print("Entities len: ", ) QUERY['after_id'] = after_id entities.clear() msg = { 'entity': 'acquisitions', 'total_record_updated': fetch_records_count } return jsonify(msg)
def funding_rounds(request): """ To fetch and update the Funding Rounds entity. """ print("\n-------Getting Funding Rounds entities-------\n") COLLECTION_NAME = 'funding_rounds_entities' END_POINT = 'searches/funding_rounds' TODAY_DATE = utils.get_today_date() YESTURDAY_DATE = utils.get_yesterday_date() QUERY = { "field_ids": [ "announced_on", "closed_on", "created_at", "entity_def_id", "funded_organization_categories", "funded_organization_description", "funded_organization_diversity_spotlights", "funded_organization_funding_stage", "funded_organization_funding_total", "funded_organization_identifier", "funded_organization_location", "funded_organization_revenue_range", "identifier", "image_id", "investment_stage", "investment_type", "investor_identifiers", "is_equity", "lead_investor_identifiers", "money_raised", "name", "num_investors", "num_partners", "permalink", "post_money_valuation", "pre_money_valuation", "rank_funding_round", "short_description", "target_money_raised", "updated_at", "uuid", ], "query": [ { "type": "predicate", "field_id": "updated_at", "operator_id": "gte", "values": [str(YESTURDAY_DATE)] }, ], "order": [{ "field_id": "updated_at", "sort": "asc", "nulls": "last" }], "limit": 1000, } total_count, entities = utils.fetch_data(QUERY, END_POINT) # TODO to add this to all of the functions if total_count is None: return "Error in parsing the API response. Please check the logs." print("total count: ", total_count) # get the people collection col = utils.get_mongodb_collection(COLLECTION_NAME) fetch_records_count = 0 # storing into the database and pagination while fetch_records_count < total_count: if fetch_records_count != 0: _, entities = utils.fetch_data(QUERY, END_POINT) if not entities: print("no entities left i.e., entities = %s. moving on." % len(entities)) break for e in entities: if e: e['insert_date'] = TODAY_DATE else: print("Entity is empty: ", e) inserted = col.insert_many(entities) fetch_records_count += len(entities) print("inserted records: ") pprint(inserted.inserted_ids) print("total_count: ", total_count, ", fetched records: ", fetch_records_count) print("------------------------") # get the last record after_id = entities[-1].get('uuid', None) if after_id: print("Get next batch after id: ", after_id) # print("Entities len: ", ) QUERY['after_id'] = after_id entities.clear() msg = { 'entity': 'funding_rounds', 'total_record_updated': fetch_records_count } return jsonify(msg)
def people(request): """ To fetch and update the People entity. """ print("\n-------Getting people entities-------\n") END_POINT = 'searches/people' COLLECTION_NAME = 'people_entities' YESTURDAY_DATE = utils.get_yesterday_date() TODAY_DATE = utils.get_today_date() QUERY = { "field_ids": [ "aliases", "born_on", "created_at", "description", "died_on", "entity_def_id", "facebook", "facet_ids", "first_name", "gender", "identifier", "image_id", "image_url", "investor_stage", "investor_type", "last_name", "layout_id", "linkedin", "location_group_identifiers", "location_identifiers", "middle_name", "name", "num_articles", "num_current_advisor_jobs", "num_current_jobs", "num_diversity_spotlight_investments", "num_event_appearances", "num_exits", "num_exits_ipo", "num_founded_organizations", "num_investments", "num_jobs", "num_lead_investments", "num_partner_investments", "num_past_advisor_jobs", "num_past_jobs", "num_portfolio_organizations", "override_layout_id", "permalink", "permalink_aliases", "primary_job_title", "primary_organization", "rank_delta_d30", "rank_delta_d7", "rank_delta_d90", "rank_person", "rank_principal", "short_description", "twitter", "updated_at", "uuid", "website", "website_url", ], "query": [ { "type": "predicate", "field_id": "updated_at", "operator_id": "gte", "values": [str(YESTURDAY_DATE)] }, ], "order": [{ "field_id": "rank_person", "sort": "asc", "nulls": "last" }], "limit": 1000, } total_count, entities = utils.fetch_data(QUERY, END_POINT) if total_count is None: return "Error in parsing the API response. Please check the logs." print("total count: ", total_count) # get the people collection col = utils.get_mongodb_collection(COLLECTION_NAME) fetch_records_count = 0 # storing into the database and pagination while fetch_records_count < total_count: if fetch_records_count != 0: _, entities = utils.fetch_data(QUERY, END_POINT) if not entities: print("no entities left i.e., entities = %s. moving on." % len(entities)) break for e in entities: if e: e['insert_date'] = TODAY_DATE else: print("Entity is empty: ", e) inserted = col.insert_many(entities) fetch_records_count += len(entities) print("inserted records: ") pprint(inserted.inserted_ids) print("total_count: ", total_count, ", fetched records: ", fetch_records_count) # get the last record print("------------------------") after_id = entities[-1].get('uuid', None) if after_id: print("Get next batch after id: ", after_id) # print("Entities len: ", ) QUERY['after_id'] = after_id entities.clear() msg = {'entity': 'Poeple', 'total_record_updated': fetch_records_count} return jsonify(msg)
from settings import ( china_all_stock_list, china_furture_min_file_path, china_furture_day_file_path, china_stock_min_file_path, china_stock_day_file_path, China_fu_list, UsStock_list, us_stock_min_file_path, us_stock_day_file_path ) if __name__ == '__main__': today_date = get_today_date() #国内股票 lg = bs.login() baostock = GetDataFromBaoStock(3, today_date,china_all_stock_list) baostock.get_china_stock_day_data(frequency='d', start_date=today_date, end_date=today_date,update=True,file_path=china_stock_day_file_path) # 更新国内日线数据 baostock.get_china_stock_min__data(frequency='15',start_date=today_date,end_date= today_date,file_path=china_stock_min_file_path,update=True) # 更新国内日线数据 bs.logout() #国内期货 sina_ = GetDataFromSina(China_fu_list) sina_.get_cn_furture_day(china_furture_day_file_path) sina_.get_cn_furture_min(china_furture_min_file_path)