def __init__(self): self.log = getLogger("Manager") self.log.info("Loading..") config = yaml.load(open("config/settings.yml")) gh = config["github"] self.github = Github(gh["username"], gh["password"]) try: self.log.info("Logged in as '%s'" % self.github.get_user().name) except Exception: self.log.exception("Unable to login: %s") self.app = Application( [], # Routes log_function=self.log_request ) self.port = reactor.listenTCP( config["networking"]["port"], self.app ) self.repos = config["repos"] if len(self.repos) > 1: self.log.info("Tracking repos: %s" % ", ".join(self.repos.keys())) else: self.log.info("Tracking repo: %s" % ", ".join(self.repos.keys())) self.ready = True self.repo_manager = Repos(self) del config
def __init__(self, manager): """ :type manager: system.manager.Manager """ self.manager = manager self.log = getLogger("Repos") for repo in self.repos.keys(): try: r = self.manager.github.get_repo(repo) self.log.info("Got repo: %s" % r.name) self.repo_objects[repo] = r except UnknownObjectException: self.log.warn("Unknown repo: %s" % repo) self.do_init()
import pprint import simplekml import math import csv import ast import os import sys import random import pymongo rom util.config import Config from util import log import subprocess from util import lat_lng import re logger = log.getLogger(__name__) # MongoDB Client & DB cfg = Config() client = MongoClient(cfg.get("mongo", "uri")) db = client[cfg.get("mongo", "db_strava")] segments_collection = db[cfg.get("mongo", "coll_segment")] leaderboard_collection = db[cfg.get("mongo", "coll_leaderboards")] zip_data_collection = db[cfg.get("mongo", "coll_zip")] wban_collection = db[cfg.get("mongo", "coll_wban")] weather_collection = db[cfg.get("mongo", "coll_weather")] #Date and time formats wban_date_format = cfg.get("weather","date_format") strava_datetime_format = cfg.get("strava","date_time_format")
#------------------------------------------------------------------------------- import sys, os from report.google_polyline_encoder import decode import simplekml import datetime as dt import csv import pprint import re import pandas from pymongo import MongoClient, GEOSPHERE from util import log from util.config import Config from bson.code import Code from bson.son import SON logger = log.getLogger(__name__) # MongoDB Client & DB cfg = Config() client = MongoClient(cfg.get("mongo", "uri")) db = client[cfg.get("mongo", "db_strava")] segments_collection = db[cfg.get("mongo", "coll_segment")] leaderboard_collection = db[cfg.get("mongo", "coll_leaderboards")] zip_data_collection = db[cfg.get("mongo", "coll_zip")] weather_collection = db[cfg.get("mongo","coll_weather")] wban_collection = db[cfg.get("mongo","coll_wban")] wban_date_format = cfg.get("weather","date_format") wban_time_format = cfg.get("weather","time_format") strava_datetime_format = cfg.get("strava","date_time_format")
from config import config from os import path import pandas as pd from openpyxl import load_workbook from util import log logging = log.getLogger('storage.log', log.INFO) class Storage: def __init__(self, shop_name): self.shop_name = shop_name self.output = path.join(config.output, shop_name + '.xlsx') return def store(self, data, collect_name, distance): try: logging.info('准备存储: %s' % (self.output)) writer = '' # 只对sheet进行操作, 避免覆盖其他sheet if path.exists(self.output): book = load_workbook(self.output) writer = pd.ExcelWriter(self.output, engine='openpyxl') writer.book = book writer.sheets = dict((ws.title, ws) for ws in book.worksheets) else: writer = pd.ExcelWriter(self.output, engine='openpyxl') sheet_name = str(distance) + str( config.content_zh_CN[collect_name])
# -*-coding:utf-8-*- from urllib.parse import urlencode import requests import time import re from selenium import webdriver from selenium.webdriver.chrome.options import Options from util import log import platform logging = log.getLogger('api.log') # 百度地图api class Handler: # 单次最大信息数 page_size = 20 # 单次最大出错次数 error_max = 10 # 构造函数 传入秘钥 def __init__(self, p_key): self.key = p_key self.raw_map = { 'ak': p_key, 'output': 'json', 'scope': '2', 'page_size': self.page_size } return
# -*-coding:utf-8-*- # 必须第一个引用chdir来改变环境 import chdir from spider import controllor from baidu.api import ApiError import pandas as pd import numpy as np from config import config from util import log from util import readkey from util import pid import time import sys import platform logging = log.getLogger('deamon.log', log.INFO) class Deamon: def __init__(self): environment = ','.join(platform.architecture()) self.s_print('当前环境: %s\n' % (environment)) logging.info('Deamon初始化') self.complete_success_num = 0 self.localday = int(time.strftime('%d', time.localtime())) self.ak_key = readkey.get_key() self.shop_filename = config.shop_filename self.complete_filename = config.complete_filename self._read_list() self.main_controller = controllor.Controller(self.ak_key)
import zmq import threading import json import time from collections import defaultdict from util.log import getLogger moduleLogger = getLogger(__name__) class HandlerNotFound(Exception): pass class InvalidArguements(Exception): pass class RPCError(Exception): pass class Message(object): def __init__(self): self._payload = {} @property def payload(self): return self._payload def setOption(self, key, value): self._payload[key] = value
curdir = os.path.dirname(os.path.abspath(__file__)) sys.path.append(os.path.join(curdir, os.pardir)) if sys.version_info[0] < 3: reload(sys) sys.setdefaultencoding("utf-8") # raise "Must be using Python 3" import numpy as np from config import W2V_CONFIG from util import log from gensim.models import keyedvectors # logger logger = log.getLogger(__file__) def load_model(model_file=W2V_CONFIG["model"], binary=False): ''' Load model with C format word2vec file. ''' if not os.path.exists(model_file): raise Exception("Model file does not exist.") return keyedvectors.KeyedVectors.load_word2vec_format( model_file, binary=binary, unicode_errors='ignore') def resolve_embedding_size(text_format_w2v_model): ''' Get word2vec embedding size with text format model
# -*-coding:utf-8-*- from config import config from spider import collector from spider import storage from util import log logging = log.getLogger('controller.log', log.INFO) class Controller: def __init__(self, ak_key): # 读取配置 self.content = config.content self.collector = collector.Collector(ak_key) pass def run(self, shop_name, real_name, shop_location, save_handler, filter=[]): # 成功爬取的结果 successful_result = filter # 构造存储器 self.storage = storage.Storage(shop_name) for key in self.content: try: collect_name = key # 如果当前待爬内容在filter过滤器中, 跳过 if collect_name in filter: continue # 遍历所需内容 logging.info('开始爬取 店名: %s, 待爬内容: %s' % (shop_name, key))
# -*-coding:utf-8-*- import baidu.api as api import pandas as pd from config import config from util import log logging = log.getLogger('collector.log', log.INFO) # 安全的map取值 def s_get(map, key): if key in map: return map[key] else: return '' class Collector: def __init__(self, ak_key): # 初始化 self.map_handler = api.Handler(ak_key) def run(self, collect_name, shop_name, shop_location, distance): # 运行某个收集器 if collect_name == 'house': return self.collect_house(shop_name, shop_location, distance) elif collect_name == 'hotel': return self.collect_hotel(shop_name, shop_location, distance) elif collect_name == 'metro': return self.collect_metro(shop_name, shop_location, distance) elif collect_name == 'bus':
from util.config import config from util.log import getLogger from pubsub import Publisher from slack import SlackClient import time from queue import Queue from queue import Empty as QueueEmpty from threading import Thread, Event import re import json _log = getLogger('slack_reader') class Reader(Thread): def __init__(self, token): self._client = SlackClient(token) # create slack client self._output = Queue() self._exit = Event() self._read_int = 1 super(Reader, self).__init__() self.daemon = True # die on process exit self._log = _log.getChild('reader') self._id, self._user, = self._retrieve_id() self._channel_cache = {} def _handle_event(self, event): self._log.debug('got event type: %s' % event['type']) self._output.put(event) def _retrieve_id(self):
import websocket import requests from util.config import config from util.log import getLogger from queue import Queue, Empty from threading import Event, Thread import json _log = getLogger('slack') def build_url(method): return 'https://slack.com/api/' + method def reqOk(resp): if resp.json() and resp.json().get('ok'): return True return False def call_method(token, method, **kwargs): kwargs['token'] = token resp = requests.post(build_url(method), data=kwargs) return reqOk(resp), resp.json() class WSocket(Thread): def __init__(self, ws, connected): self._connected = connected self._ws = ws