def __init__(self): # 设置数据库 mongo = MongoDB( conn_str='mongodb://*****:*****@123.207.185.126:27017/') mdb = MonDatabase(mongodb=mongo, database_name='proxy') self._collection = MonCollection(database=mdb, collection_name='proxys')
def __init__(self): # 设置数据库 mongo = MongoDB(conn_str='mongodb://*****:*****@123.207.185.126:27017/') self._mdb = MonDatabase(mongodb=mongo, database_name='statsgov') # variable query website self._var_query_web = 'http://data.stats.gov.cn/adv.htm' self._var_query_web_params = {'m': 'findZbXl', 'wd': 'zb'} self._tags = {'年度全国':'hgnd', '年度地区':'fsnd'}
def __init__(self): #设置代理服务器 self._proxy_manager = ProxyManager() # 设置数据库 mongo = MongoDB(conn_str='mongodb://*****:*****@123.207.185.126:27017/') self._mdb = MonDatabase(mongodb=mongo, database_name='statsgov') self._tags = {'年度全国': 'hgnd', '年度地区': 'fsnd'} self._stats_gov_url_template = 'http://data.stats.gov.cn/easyquery.htm?m={}&dbcode={}&rowcode={}&' \ 'colcode={}&wds={}&dfwds={}&k1=14930450350'
def __init__(self, website=None, label=None): self.pmanager = ProxyManager() # 设置网站地址 self.website = website # 设置数据库 mongo = MongoDB( conn_str='mongodb://*****:*****@123.207.185.126:27017/') mdb = MonDatabase(mongodb=mongo, database_name='local') self.db = MonCollection(database=mdb, collection_name='scraper') # 设置标示 self.label = label
def __init__(self): # 设置数据库 mongo = MongoDB(conn_str='mongodb://*****:*****@123.207.185.126:27017/') mdb = MonDatabase(mongodb=mongo, database_name='proxy') self._collection = MonCollection(database=mdb, collection_name='proxy') # 验证的网址 self._checked_websites = [{'address':'http://www.163.com', 'title':'网易'}, {'address':'http://www.sina.com.cn', 'title':'新浪首页'}, {'address':'https://www.douban.com/', 'title':'豆瓣'}, {'address':'http://www.sohu.com/', 'title':'搜狐'}, {'address':'http://www.eastday.com/', 'title':'东方网'}, {'address':'http://www.shanghaiairport.com/', 'title':'上海机场(集团)有限公司'}] # 设置检验完的代理服务器列表 self._checked_proxy_list = dict()
def insert_to_db(self, literatures=None, database='papers', collection='cnki', condition=None): mongo = MongoDB( conn_str='mongodb://*****:*****@123.207.185.126:27017/') mdb = MonDatabase(mongodb=mongo, database_name=database) conn = MonCollection(database=mdb, collection_name=collection) for record in literatures: print(record['title'], record['journal'], record['year'], record['issue'], record.get('pages')) if record.get('author') is None: print('No author!') continue if condition is not None: if not condition[1] == record.get(condition[0]): print('Journal not matched!->', condition[1], record.get(condition[0])) continue result = conn.collection.find_one({ 'title': record.get('title'), 'journal': record.get('journal'), 'year': record.get('year'), 'issue': record.get('issue'), 'pages': record.get('pages') }) if result is None: print('Insert...!') conn.collection.insert_one(record) else: print('Update...!') conn.collection.update_one( { 'title': record.get('title'), 'journal': record.get('journal'), 'year': record.get('year'), 'issue': record.get('issue'), 'pages': record.get('pages') }, { '$set': { 'cite': record.get('cite'), 'download': record.get('download') } })
# coding=UTF-8 # ============================ # @app: 检验火车站点间是否有直通车 # @author: glen # @date: 2017.1.8 # ============================ import pickle from libs.database.class_mongodb import MongoDB, MonDatabase, MonCollection from libs.application.train.class_trainscraper import TrainStationScraper, TrainTicketLeftScraper from libs.application.train.class_trainscraper import StationPairsGenerator, StationPairValidator # 0. 初始化 train_db = MongoDB(conn_str='mongodb://*****:*****@123.207.185.126:27017/') train_station_collection = MonCollection(database=MonDatabase(mongodb=train_db, database_name='train'), collection_name='stations') day = '2017-01-10' DOWNLOAD = False LOAD = True FILE_NAME = 'station_pairs.pkl' # 1. 爬取站点名,并且储存所有站点对进入数据库 if DOWNLOAD: F = open(FILE_NAME, 'wb') Stations = TrainStationScraper().scrape() All_Station_Pairs = list(StationPairsGenerator(stations=Stations)()) pickle.dump(All_Station_Pairs, F) F.close()
# coding = UTF-8 import pandas as pd from libs.database.class_mongodb import MongoDB, MonDatabase, MonCollection # 数据库 mongo = MongoDB(conn_str='mongodb://*****:*****@123.207.185.126:27017/') mdb = MonDatabase(mongodb=mongo, database_name='region') mcollection = MonCollection(database=mdb, collection_name='admindivision') def find_code(region): found = list( mcollection.collection.find({ 'year': '2010', 'region': region }, projection={ '_id': False, 'acode': True })) if len(found) > 1: print(found) raise Exception elif len(found) == 1: return found[0]['acode'] else: return None # 载入