Exemple #1
0
    def __init__(self):
        self.log = getLogger("Manager")
        self.log.info("Loading..")

        config = yaml.load(open("config/settings.yml"))

        gh = config["github"]
        self.github = Github(gh["username"], gh["password"])

        try:
            self.log.info("Logged in as '%s'" % self.github.get_user().name)
        except Exception:
            self.log.exception("Unable to login: %s")

        self.app = Application(
            [],  # Routes
            log_function=self.log_request
        )

        self.port = reactor.listenTCP(
            config["networking"]["port"], self.app
        )

        self.repos = config["repos"]
        if len(self.repos) > 1:
            self.log.info("Tracking repos: %s" % ", ".join(self.repos.keys()))
        else:
            self.log.info("Tracking repo: %s" % ", ".join(self.repos.keys()))

        self.ready = True

        self.repo_manager = Repos(self)

        del config
Exemple #2
0
    def __init__(self, manager):
        """
        :type manager: system.manager.Manager
        """
        self.manager = manager
        self.log = getLogger("Repos")

        for repo in self.repos.keys():
            try:
                r = self.manager.github.get_repo(repo)

                self.log.info("Got repo: %s" % r.name)
                self.repo_objects[repo] = r
            except UnknownObjectException:
                self.log.warn("Unknown repo: %s" % repo)

        self.do_init()
Exemple #3
0
import pprint
import simplekml
import math
import csv
import ast
import os
import sys
import random
import pymongo
rom util.config import Config
from util import log
import subprocess
from util import lat_lng
import re

logger = log.getLogger(__name__)

# MongoDB Client & DB
cfg = Config()
client = MongoClient(cfg.get("mongo", "uri"))
db = client[cfg.get("mongo", "db_strava")]
segments_collection = db[cfg.get("mongo", "coll_segment")]
leaderboard_collection = db[cfg.get("mongo", "coll_leaderboards")]
zip_data_collection = db[cfg.get("mongo", "coll_zip")]
wban_collection = db[cfg.get("mongo", "coll_wban")]
weather_collection = db[cfg.get("mongo", "coll_weather")]

#Date and time formats
wban_date_format = cfg.get("weather","date_format")
strava_datetime_format = cfg.get("strava","date_time_format")
#-------------------------------------------------------------------------------
import sys, os
from report.google_polyline_encoder import decode
import simplekml
import datetime as dt
import csv
import pprint
import re
import pandas
from pymongo import MongoClient, GEOSPHERE
from util import log
from util.config import Config
from bson.code import Code
from bson.son import SON

logger = log.getLogger(__name__)

# MongoDB Client & DB
cfg = Config()
client = MongoClient(cfg.get("mongo", "uri"))
db = client[cfg.get("mongo", "db_strava")]
segments_collection = db[cfg.get("mongo", "coll_segment")]
leaderboard_collection = db[cfg.get("mongo", "coll_leaderboards")]
zip_data_collection = db[cfg.get("mongo", "coll_zip")]
weather_collection = db[cfg.get("mongo","coll_weather")]
wban_collection = db[cfg.get("mongo","coll_wban")]

wban_date_format = cfg.get("weather","date_format")
wban_time_format = cfg.get("weather","time_format")
strava_datetime_format = cfg.get("strava","date_time_format")
from config import config
from os import path
import pandas as pd
from openpyxl import load_workbook
from util import log

logging = log.getLogger('storage.log', log.INFO)


class Storage:
    def __init__(self, shop_name):
        self.shop_name = shop_name
        self.output = path.join(config.output, shop_name + '.xlsx')
        return

    def store(self, data, collect_name, distance):
        try:
            logging.info('准备存储: %s' % (self.output))
            writer = ''

            # 只对sheet进行操作, 避免覆盖其他sheet
            if path.exists(self.output):
                book = load_workbook(self.output)
                writer = pd.ExcelWriter(self.output, engine='openpyxl')
                writer.book = book
                writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
            else:
                writer = pd.ExcelWriter(self.output, engine='openpyxl')

            sheet_name = str(distance) + str(
                config.content_zh_CN[collect_name])
Exemple #6
0
# -*-coding:utf-8-*-
from urllib.parse import urlencode
import requests
import time
import re
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from util import log
import platform

logging = log.getLogger('api.log')


# 百度地图api
class Handler:
    # 单次最大信息数
    page_size = 20

    # 单次最大出错次数
    error_max = 10

    # 构造函数 传入秘钥
    def __init__(self, p_key):
        self.key = p_key
        self.raw_map = {
            'ak': p_key,
            'output': 'json',
            'scope': '2',
            'page_size': self.page_size
        }
        return
Exemple #7
0
# -*-coding:utf-8-*-
# 必须第一个引用chdir来改变环境
import chdir
from spider import controllor
from baidu.api import ApiError
import pandas as pd
import numpy as np
from config import config
from util import log
from util import readkey
from util import pid
import time
import sys
import platform

logging = log.getLogger('deamon.log', log.INFO)


class Deamon:
    def __init__(self):
        environment = ','.join(platform.architecture())
        self.s_print('当前环境: %s\n' % (environment))

        logging.info('Deamon初始化')
        self.complete_success_num = 0
        self.localday = int(time.strftime('%d', time.localtime()))
        self.ak_key = readkey.get_key()
        self.shop_filename = config.shop_filename
        self.complete_filename = config.complete_filename
        self._read_list()
        self.main_controller = controllor.Controller(self.ak_key)
Exemple #8
0
import zmq
import threading
import json
import time
from collections import defaultdict
from util.log import getLogger
moduleLogger = getLogger(__name__)


class HandlerNotFound(Exception):
    pass


class InvalidArguements(Exception):
    pass


class RPCError(Exception):
    pass


class Message(object):
    def __init__(self):
        self._payload = {}

    @property
    def payload(self):
        return self._payload

    def setOption(self, key, value):
        self._payload[key] = value
curdir = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(curdir, os.pardir))

if sys.version_info[0] < 3:
    reload(sys)
    sys.setdefaultencoding("utf-8")
    # raise "Must be using Python 3"

import numpy as np
from config import W2V_CONFIG
from util import log
from gensim.models import keyedvectors

# logger
logger = log.getLogger(__file__)


def load_model(model_file=W2V_CONFIG["model"], binary=False):
    '''
    Load model with C format word2vec file.
    '''
    if not os.path.exists(model_file):
        raise Exception("Model file does not exist.")
    return keyedvectors.KeyedVectors.load_word2vec_format(
        model_file, binary=binary, unicode_errors='ignore')


def resolve_embedding_size(text_format_w2v_model):
    '''
    Get word2vec embedding size with text format model
# -*-coding:utf-8-*-
from config import config
from spider import collector
from spider import storage
from util import log


logging = log.getLogger('controller.log', log.INFO)


class Controller:
    def __init__(self, ak_key):
        # 读取配置
        self.content = config.content
        self.collector = collector.Collector(ak_key)
        pass

    def run(self, shop_name, real_name, shop_location, save_handler,
            filter=[]):
        # 成功爬取的结果
        successful_result = filter
        # 构造存储器
        self.storage = storage.Storage(shop_name)
        for key in self.content:
            try:
                collect_name = key
                # 如果当前待爬内容在filter过滤器中, 跳过
                if collect_name in filter:
                    continue
                # 遍历所需内容
                logging.info('开始爬取 店名: %s, 待爬内容: %s' % (shop_name, key))
# -*-coding:utf-8-*-
import baidu.api as api
import pandas as pd
from config import config
from util import log

logging = log.getLogger('collector.log', log.INFO)


# 安全的map取值
def s_get(map, key):
    if key in map:
        return map[key]
    else:
        return ''


class Collector:
    def __init__(self, ak_key):
        # 初始化
        self.map_handler = api.Handler(ak_key)

    def run(self, collect_name, shop_name, shop_location, distance):
        # 运行某个收集器
        if collect_name == 'house':
            return self.collect_house(shop_name, shop_location, distance)
        elif collect_name == 'hotel':
            return self.collect_hotel(shop_name, shop_location, distance)
        elif collect_name == 'metro':
            return self.collect_metro(shop_name, shop_location, distance)
        elif collect_name == 'bus':
from util.config import config
from util.log import getLogger
from pubsub import Publisher
from slack import SlackClient
import time
from queue import Queue
from queue import Empty as QueueEmpty
from threading import Thread, Event
import re
import json

_log = getLogger('slack_reader')


class Reader(Thread):
    def __init__(self, token):
        self._client = SlackClient(token)  # create slack client
        self._output = Queue()
        self._exit = Event()
        self._read_int = 1
        super(Reader, self).__init__()
        self.daemon = True  # die on process exit
        self._log = _log.getChild('reader')
        self._id, self._user, = self._retrieve_id()
        self._channel_cache = {}

    def _handle_event(self, event):
        self._log.debug('got event type: %s' % event['type'])
        self._output.put(event)

    def _retrieve_id(self):
Exemple #13
0
import websocket
import requests
from util.config import config
from util.log import getLogger
from queue import Queue, Empty
from threading import Event, Thread
import json

_log = getLogger('slack')


def build_url(method):
    return 'https://slack.com/api/' + method


def reqOk(resp):
    if resp.json() and resp.json().get('ok'):
        return True
    return False


def call_method(token, method, **kwargs):
    kwargs['token'] = token
    resp = requests.post(build_url(method), data=kwargs)
    return reqOk(resp), resp.json()


class WSocket(Thread):
    def __init__(self, ws, connected):
        self._connected = connected
        self._ws = ws