def __init__(self, root_cat, depth, log_file, output_dir, root_dir): # init logger self._logger = mylogger.get_logger( DistantExtractor.__name__, log_file, mylogger.DEBUG ) io_logger = mylogger.get_logger( FileIO.__name__, log_file, mylogger.DEBUG ) wiki_logger = mylogger.get_logger( WikipediaExtractor.__name__, log_file, mylogger.DEBUG ) morph_logger = mylogger.get_logger( MorphemeTagger.__name__, log_file, mylogger.DEBUG ) # init instance self._file_io = FileIO(output_dir, io_logger) self._wiki_extractor = WikipediaExtractor(wiki_logger, self._file_io) self._morpheme_tagger = MorphemeTagger(morph_logger, root_dir) # init args self._root_cat = root_cat self._limit_depth = depth #TODO 後々は複数クラスのシードを持てるようにする # name をkey, seeds(list)をvalueなdictにする # ラベリングのところはそうなってる self._seed_name = 'Car' self._seeds = list() self._categories = [self._root_cat] # init name self._seed_dir = 'seeds' self._unlabeled_dir = 'unlabeled_corpora' self._cleaned_dir = 'cleaned_corpora' self._mecab_dir = 'mecab_corpora' self._labeled_dir = 'labeled_corpora' self._train_dir = 'train_corpora' self._output = 'output' self._temp_dir = 'temp' self._templatefile = '%s/templates/template' % root_dir self._trainfile = '%s/train.txt' % output_dir self._decodefile = '%s/decode.txt' % output_dir self._modelfile = '%s/model' % output_dir self._all_labeledfile = '%s/all_labeled.txt' % output_dir
def __init__(self, blog, limit_start=0, num=30, threads_num=10, need_save=True, save_path=None, img_re=None, total_post_re=None, max_posts=None, proxies=None): self.blog = blog self.base_url = "http://%s.tumblr.com/api/read/json?start=" % self.blog self.total_post_re = total_post_re if total_post_re else re.compile( r'"posts-total":(\d+),') self.img_re = img_re if img_re else re.compile( r'photo-url-1280":"(http.*?)",') self.total_posts = 0 self.max_posts = max_posts self.limit_start = limit_start self.num = num self.need_save = need_save if self.need_save: self.save_path = save_path self._check_save_path() else: from mylogger import get_logger self.imglog = get_logger("imgurl") self.proxies = proxies self.img_queue = Queue() self.post_queue = Queue() self.threads_num = threads_num
def main(): logger = get_logger(__name__) logger.debug("start self_checkout_machine") settings.init() # print settings.app app = QtGui.QApplication(sys.argv) window = main_window.MainWindow() window.goodShow() window.check_devices() sys.exit(app.exec_())
def __init__(self, blog, limit_start=0, num=30, threads_num=10, need_save=True, save_path=None, img_re=None, total_post_re=None, max_posts=None, proxies=None): self.blog = blog self.base_url = "http://%s.tumblr.com/api/read/json?start=" % self.blog self.total_post_re = total_post_re if total_post_re else re.compile(r'"posts-total":(\d+),') self.img_re = img_re if img_re else re.compile(r'photo-url-1280":"(http.*?)",') self.total_posts = 0 self.max_posts = max_posts self.limit_start = limit_start self.num = num self.need_save = need_save if self.need_save: self.save_path = save_path self._check_save_path() else: from mylogger import get_logger self.imglog = get_logger("imgurl") self.proxies = proxies self.img_queue= Queue() self.post_queue = Queue() self.threads_num = threads_num
#!/usr/bin/env python #-*-coding:utf-8-*- """ 通用程序 """ import sys import os import requests from mylogger import get_logger dllog = get_logger("app") try: reload(sys) sys.setdefaultencoding('utf-8') except NameError: # The only supported default encodings in Python are: # Python 2.x: ASCII # Python 3.x: UTF-8 # So no need to sys.setdefaultencoding('utf-8') pass # py3 # 执行 requests 的数据下载 def download_page(url, ret_json=False, proxies=None): if not url: dllog.info("url should not be None") return '' try:
import re from mylogger import get_logger reload(sys) sys.setdefaultencoding('utf-8') DBHOST = "localhost:3306" SCHEMA = "CAIPIAO" DBUSER = "******" DBPASSWD = "passwd" db = torndb.Connection(host=DBHOST, database=SCHEMA, user=DBUSER, password=DBPASSWD) cplog = get_logger("caipiao") class Data_Sync(object): ssc_re = re.compile( r'<td class=\'gray\'>(.*?)</td>(<td class=\'red big\'>|<td style=\'width:65px\'>)(.*?)</td>.*?<tr>' ) def __init__(self, start_date="20150101", sleep_secs=10, run_ever=True, callback=None): self.start_date = start_date if start_date > "20130101" else "20150101" self.run_ever = run_ever self.base_url = "http://chart.cp.360.cn/kaijiang/kaijiang?lotId=255401&spanType=2&span="
import mylogger print("here as well") log = mylogger.get_logger(__name__) print("here here") def say_hello(name): print("here too") log.debug("Greeting people.")
from flask import Flask, jsonify, make_response, request from flask_cors import CORS, cross_origin import json from Database import Database as db from mylogger import get_logger logger = get_logger() app = Flask(__name__) CORS(app, support_credentials=True) app.config["CORS_HEADERS"] = "Content-Type" @app.route("/api/insert", methods=["POST"]) @cross_origin(origin="*", headers=["Content-Type", "Authorization"]) def insert_data(): data = json.loads(request.data) logger.info(f"The request data to insert: {data}") count = db.get_count_by_date(data["date"]) if count == 1: logger.info(f"Updating only! Temperature key already exists!") db.update_date(data) else: logger.info(f"Inserting new entry!") # Insert db.insert_data(data)
email: [email protected] ''' import settings import redis from crawler import Crawler from crawleritem import CrawlerItem from StringIO import StringIO import lxml.html as LH from strategy import Strategy from mylogger import get_logger from datetime import * import hashlib import os logger = get_logger(name="worker", file_name="worker.log") class Worker(Crawler): def __init__(self, start_urls=None, template_cls=None, strategy=None, settings=None): super(Worker, self).__init__(start_urls=start_urls, template_cls=template_cls, strategy=strategy) logger.debug('Init Worker...') if settings is None: try: self.r = redis.StrictRedis(host='localhost', port=6379, db=0) logger.debug('Connection local redis') if self.r is None: raise redis.exceptions.ConnectionError except (redis.exceptions.ConnectionError), e: raise e else:
# -*- coding: utf-8 -*- ''' author: assassinpig email: [email protected] ''' import signal, os import time import settings import redis import threading from mylogger import get_logger logger = get_logger(name="master", file_name="master.log") class Master(object): @staticmethod def exit_handler(signum, frame): #print 'you press ctrl+c!' logger.debug('you press Ctrl+C!') pass def __init__(self, settings=None, start_urls=None, strategy=None): logger.debug('Init Master ...') if settings is None: try: logger.debug('Connect to local redis') self.r = redis.StrictRedis(host='localhost', port=6379, db=0) if self.r is None: raise redis.exceptions.ConnectionError except (redis.exceptions.ConnectionError), e:
#!/usr/bin/env python #-*-coding:utf-8-*- """ 1、检测商品是否有货(支持省、市,默认是四川成都); 2、获取当前价格; 输入值,商品 url 或者 skuid 返回值: (time.time(), 是否有货,当前价格) """ import re import time import requests from mylogger import get_logger jdlog = get_logger('jd') def run(url, provinceid=1, cityid=72): skuid = parse_url_for_skuid(url) if skuid: title, stock_state = check_if_in_stock(skuid, provinceid, cityid) price = get_current_price(skuid) return { "last_update": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "stock_state": stock_state, "price": price, "title": title }
email: [email protected] ''' import settings import redis from crawler import Crawler from crawleritem import CrawlerItem from StringIO import StringIO import lxml.html as LH from strategy import Strategy from mylogger import get_logger from datetime import * import hashlib import os logger = get_logger(name="worker", file_name="worker.log") class Worker(Crawler): def __init__(self, start_urls=None, template_cls=None, strategy=None, settings=None): super(Worker, self).__init__(start_urls=start_urls, template_cls=template_cls, strategy=strategy) logger.debug('Init Worker...') if settings is None: try: self.r = redis.StrictRedis(host='localhost', port=6379, db=0)
# LastChange: 2014-12-08 10:12:51 # History: #============================================================================= ''' """ 通用程序 """ import torndb import _mysql_exceptions import sys import requests from mylogger import get_logger ulog = get_logger("utils") applog = get_logger("app") reload(sys) sys.setdefaultencoding('utf-8') db = torndb.Connection(host="localhost", database="WANGYI", user="******", password="******") # 数据库执行 def insert_mysql(sql): try: db.insert(sql) return True except _mysql_exceptions.IntegrityError, e: """ 主键冲突,此处不算错误 """
#!/usr/bin/env python # -*-coding:utf-8-*- """ 1、检测商品是否有货(支持省、市,默认是四川成都); 2、获取当前价格; 输入值,商品 url 或者 skuid 返回值: (time.time(), 是否有货,当前价格) """ import re import time import requests from mylogger import get_logger jdlog = get_logger("jd") def run(url, provinceid=1, cityid=72): skuid = parse_url_for_skuid(url) if skuid: title, stock_state = check_if_in_stock(skuid, provinceid, cityid) price = get_current_price(skuid) return { "last_update": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "stock_state": stock_state, "price": price, "title": title, } def parse_url_for_skuid(url):
import time from mylogger import get_logger import mysql_python from whois import whois from whois import parser try: import configparser # py3 except ImportError: import ConfigParser as configparser # py2 import pytz tz = pytz.timezone(pytz.country_timezones('cn')[0]) cf = configparser.ConfigParser() cf.read("config.ini") dllog = get_logger("success") responselog = get_logger("response") errorlog = get_logger("error") orederlog = get_logger("domains_orders") userid=cf.get("resellerclub", "userid") apikey=cf.get("resellerclub", "apikey") mysqlhost=cf.get("db", "mysqlhost") mysqluser=cf.get("db", "mysqluser") mysqlpass=cf.get("db", "mysqlpass") mysqldb=cf.get("db", "mysqldb") mail_to="[email protected],[email protected],[email protected]" #最初版本适用,后来从数据库读取相关数据,以下变量变成数据库读取值失败后的最后默认值
import torndb import sys import requests import re from mylogger import get_logger reload(sys) sys.setdefaultencoding('utf-8') DBHOST = "localhost:3306" SCHEMA = "CAIPIAO" DBUSER = "******" DBPASSWD = "passwd" db = torndb.Connection(host=DBHOST, database=SCHEMA, user=DBUSER, password=DBPASSWD) cplog = get_logger("caipiao") class Data_Sync(object): ssc_re = re.compile(r'<td class=\'gray\'>(.*?)</td>(<td class=\'red big\'>|<td style=\'width:65px\'>)(.*?)</td>.*?<tr>') def __init__(self, start_date="20150101", sleep_secs = 10, run_ever=True, callback=None): self.start_date = start_date if start_date > "20130101" else "20150101" self.run_ever = run_ever self.base_url = "http://chart.cp.360.cn/kaijiang/kaijiang?lotId=255401&spanType=2&span=" self.latest_date = '' self.latest_period = '' self.need_sleep = False self.sleep_secs = sleep_secs self.callback=callback def run(self):
# Version: 0.0.1 # LastChange: 2014-12-08 10:12:51 # History: #============================================================================= ''' """ 通用程序 """ import torndb import _mysql_exceptions import sys import requests from mylogger import get_logger ulog = get_logger("utils") applog = get_logger("app") reload(sys) sys.setdefaultencoding('utf-8') db = torndb.Connection(host="localhost", database="WANGYI", user="******", password="******") # 数据库执行 def insert_mysql(sql): try: db.insert(sql)