import traceback
import datetime

import datetime

import MySQLdb

sys.path.append(sys.prefix + "\\Lib\\MyWheels")
reload(sys)
sys.setdefaultencoding('utf8')
import set_log  # log_obj.debug(文本)  "\x1B[1;32;41m (文本)\x1B[0m"
import mysql_connecter

mysql_connecter = mysql_connecter.mysql_connecter()

log_obj = set_log.Logger('data_cleaner.log', set_log.logging.WARNING,
                         set_log.logging.DEBUG)
log_obj.cleanup('data_cleaner.log', if_cleanup=True)  # 是否需要在每次运行程序前清空Log文件


class data_cleaner(object):
    def __init__(self):
        pass

    def get_data(self, length=100):
        """每次只会读取100条数据,若是长时间没有清洗过数据了,需要更改这个数值"""
        sql = r"SELECT `key`, `detail` FROM `monitor` WHERE `parcel_no` <> ''"  # LIMIT %s [length,],
        data = mysql_connecter.connect(sql,
                                       dbname='spider',
                                       ip='116.62.230.38',
                                       user='******',
                                       password='******')
Example #2
0
    @time: 2017/11/28 13:39
--------------------------------
"""
import sys
import os

import datetime
import pandas as pd
import numpy as np

sys.path.append(sys.prefix + "\\Lib\\MyWheels")
reload(sys)
sys.setdefaultencoding('utf8')
import set_log  # log_obj.debug(文本)  "\x1B[1;32;41m (文本)\x1B[0m"

log_obj = set_log.Logger('calculation.log', set_log.logging.WARNING,
                         set_log.logging.DEBUG)
log_obj.cleanup('calculation.log', if_cleanup=True)  # 是否需要在每次运行程序前清空Log文件


class calculation(object):
    def __init__(self):
        pass

    def earnings_cal(self, fund_code, df, **kwarg):
        """

        :param fund_code: 净值的数列
        :param df:
        :param kwarg:
        :return:
        """
import pandas as pd
import numpy as np
import time
import chardet
import xlwt
from contextlib import closing
import pymysql

sys.path.append(sys.prefix + "\\Lib\\MyWheels")

reload(sys)
sys.setdefaultencoding('utf-8')
import set_log  # log_obj.debug(文本)  "\x1B[1;32;41m (文本)\x1B[0m"

log_obj = set_log.Logger('stock_capital_flow.log', set_log.logging.WARNING,
                         set_log.logging.DEBUG)
log_obj.cleanup('stock_capital_flow.log',
                if_cleanup=True)  # 是否需要在每次运行程序前清空Log文件

import requests_manager
requests_manager = requests_manager.requests_manager()
import xls_manager
xls_manager = xls_manager.xls_manager()
print sys.getdefaultencoding()
token = '1942f5da9b46b069953c873404aad4b5'


class stock_capital_flow(object):
    def __init__(self):
        pass
import numpy as np
import time

import re

import time

sys.path.append(sys.prefix + "\\Lib\\MyWheels")
reload(sys)
sys.setdefaultencoding('utf8')
import set_log  # log_obj.debug(文本)  "\x1B[1;32;41m (文本)\x1B[0m"
import mysql_connecter
import data_cleaner
data_cleaner = data_cleaner.data_cleaner()
mysql_connecter = mysql_connecter.mysql_connecter()
log_obj = set_log.Logger('DF_reader.log', set_log.logging.WARNING,
                         set_log.logging.DEBUG)
log_obj.cleanup('DF_reader.log', if_cleanup=True)  # 是否需要在每次运行程序前清空Log文件

with open('df_rename.json') as f:
    df_rename = json.load(f, encoding='utf8')

city_list = [
    u'杭州', u'宁波', u'绍兴', u'湖州', u'嘉兴', u'金华', u'衢州', u'台州', u'丽水', u'舟山'
]

# 标题行存在如下列标题,则更换
title_replace = {
    u'杭州萧山': {
        u'编号': u'地块编号'
    },
    u'嘉兴': {
Example #5
0
import sys
import os

import datetime
import pandas as pd
import numpy as np
import re
import xlwt
import time

sys.path.append(sys.prefix + "\\Lib\\MyWheels")
reload(sys)
sys.setdefaultencoding('utf8')
import set_log  # log_obj.debug(文本)  "\x1B[1;32;41m (文本)\x1B[0m"

log_obj = set_log.Logger('report_output.log', set_log.logging.WARNING,
                         set_log.logging.DEBUG)
log_obj.cleanup('report_output.log', if_cleanup=True)  # 是否需要在每次运行程序前清空Log文件

import net_value_cal_display
import compare_net_value
import fund_holdings_display


class report_output(object):
    def __init__(self):
        self.net_value_cal_display = net_value_cal_display.net_value_cal_display(
        )
        self.compare_net_value = compare_net_value.compare_net_value()
        self.fund_holdings_display = fund_holdings_display.fund_holdings_display(
        )
import re
import time
import json
import copy

sys.path.append(sys.prefix + "\\Lib\\MyWheels")
reload(sys)
sys.setdefaultencoding('utf8')
import requests_manager
requests_manager = requests_manager.requests_manager()
import api51
api51 = api51.api51()

import set_log  # log_obj.debug(文本)  "\x1B[1;32;41m (文本)\x1B[0m"

log_obj = set_log.Logger('cal_fitting_net_value.log', set_log.logging.WARNING,
                         set_log.logging.DEBUG)
log_obj.cleanup('cal_fitting_net_value.log',
                if_cleanup=True)  # 是否需要在每次运行程序前清空Log文件

user_key = '1923eae2a3054d4c92a7eb74d7f65396'

with open('stock_blacklist.txt', 'r') as f:
    stock_blacklist = f.read().split('\n')

d = {
    'prod_code': '000001.SS',
    'candle_mode': '0',
    'data_count': '1000',
    'get_type': 'offset',
    'search_direction': '1',
    'candle_period': '6',
Example #7
0
    @time: 2017/4/18 13:54
--------------------------------
"""
import sys
import os
import datetime
import pymail
import time
import csv

sys.path.append(sys.prefix + "\\Lib\\MyWheels")
reload(sys)
sys.setdefaultencoding('utf8')
import set_log  # log_obj.debug(文本)  "\x1B[1;32;41m (文本)\x1B[0m"

log_obj = set_log.Logger('controller.log', set_log.logging.WARNING,
                         set_log.logging.DEBUG)
log_obj.cleanup('controller.log', if_cleanup=False)  # 是否需要在每次运行程序前清空Log文件

file_name = 'NEW.csv'  #新公告将储存在这个文件中


class controller(object):
    def __init__(self):
        self.pymail = pymail.pymail()

    def initialize(self):
        """删除旧的公告发布数据"""
        if os.path.exists(file_name):
            os.remove(file_name)

    def start_spider(self, spider_id):
import pandas as pd
import numpy as np
from contextlib import closing
import pymysql
import datetime
import xlrd
import xlwt
from xlutils.copy import copy
import re

sys.path.append(sys.prefix + "\\Lib\\MyWheels")
reload(sys)
sys.setdefaultencoding('utf8')
import set_log  # log_obj.debug(文本)  "\x1B[1;32;41m (文本)\x1B[0m"

log_obj = set_log.Logger('fund_holdings_display.log', set_log.logging.WARNING,
                         set_log.logging.DEBUG)
log_obj.cleanup('fund_holdings_display.log',
                if_cleanup=True)  # 是否需要在每次运行程序前清空Log文件


class fund_holdings_display(object):
    def __init__(self):
        sql = """
        SELECT `fund_code`, `fund_name`
        FROM `fund_info`
        """
        with closing(
                pymysql.connect('10.10.10.15',
                                'spider',
                                'jlspider',
                                'spider',
Example #9
0
import datetime
import pandas as pd
import numpy as np

import api51
api51 = api51.api51()
from contextlib import closing
import pymysql
import copy

sys.path.append(sys.prefix + "\\Lib\\MyWheels")
reload(sys)
sys.setdefaultencoding('utf8')
import set_log  # log_obj.debug(文本)  "\x1B[1;32;41m (文本)\x1B[0m"

log_obj = set_log.Logger('mock_trading.log', set_log.logging.WARNING,
                         set_log.logging.DEBUG)
log_obj.cleanup('mock_trading.log', if_cleanup=True)  # 是否需要在每次运行程序前清空Log文件

user_key = '1923eae2a3054d4c92a7eb74d7f65396'

# d = {
#     'prod_code':'000001.SS',
#     'candle_mode':'0',
#     'data_count':'1000',
#     'get_type':'offset',
#     'search_direction':'1',
#     'candle_period':'6',
# }
# json_data = api51.connect(user_key, d)
# df = pd.DataFrame(json_data['data']['candle']['000001.SS'])
# date_ser = df[0].apply(lambda num:datetime.datetime.strptime(str(num),'%Y%m%d'))
Example #10
0
    @time: 2017/11/24 11:35
--------------------------------
"""
import sys
import os
import urllib2

import pandas as pd
import numpy as np

sys.path.append(sys.prefix + "\\Lib\\MyWheels")
reload(sys)
sys.setdefaultencoding('utf8')
import set_log  # log_obj.debug(文本)  "\x1B[1;32;41m (文本)\x1B[0m"

log_obj = set_log.Logger('api51.log', set_log.logging.WARNING,
                         set_log.logging.DEBUG)
log_obj.cleanup('api51.log', if_cleanup=True)  # 是否需要在每次运行程序前清空Log文件

user_key = '1923eae2a3054d4c92a7eb74d7f65396'


class api51(object):
    def __init__(self):
        pass

    def connect(self, appcode, querys, path='/kline'):
        host = 'http://stock.api51.cn'
        method = 'GET'
        #appcode = '343dff93ee6549daab7f1d6b8e244027'
        #'candle_mode=0&candle_period=1&data_count=10&date=date&end_date=end_date&fields=fields&get_type=offset&min_time=min_time&prod_code=000001.SS&search_direction=1&start_date=start_date'
        print "api51 querys:\n", '\n'.join(
sys.path.append(sys.prefix + "\\Lib\\MyWheels")
reload(sys)
sys.setdefaultencoding('utf8')
import html_table_reader
import PhantomJS_driver
import requests_manager

requests_manager = requests_manager.requests_manager()

PhantomJS_driver = PhantomJS_driver.PhantomJS_driver()
html_table_reader = html_table_reader.html_table_reader()

import set_log  # log_obj.debug(文本)  "\x1B[1;32;41m (文本)\x1B[0m"

log_obj = set_log.Logger(u'crawler全国青少年科技竞赛获奖名单公示.log',
                         set_log.logging.WARNING, set_log.logging.DEBUG)
log_obj.cleanup(u'crawler全国青少年科技竞赛获奖名单公示.log',
                if_cleanup=True)  # 是否需要在每次运行程序前清空Log文件


class crawler0(object):
    def __init__(self):
        pass

    def get_list(self):

        resp = requests.get('http://gs.cyscc.org.cn/')
        bs_obj = bs4.BeautifulSoup(resp.text, 'html.parser')
        e_table = bs_obj.find('table', class_='styledTable')

        e_trs = e_table.find_all('tr')[1:]
Example #12
0
"""

import pymysql as mysql
import sys
reload(sys)
sys.setdefaultencoding('utf8')
import set_log
import pandas as pd
import numpy as np
from itertools import chain

#import sqlalchemy
#import sqlalchemy.ext.declarative
#import sqlalchemy.orm

log_obj = set_log.Logger('mysql_connecter.log', set_log.logging.WARNING,
                         set_log.logging.DEBUG)
log_obj.cleanup('mysql_connecter.log', if_cleanup=True)  # 是否需要在每次运行程序前清空Log文件


class mysql_connecter(object):
    def __init__(self):
        pass

    def connect(self,
                sql,
                args=None,
                host='localhost',
                user='******',
                password='******',
                dbname='spider',
                charset='utf8'):
Example #13
0
import sys
import os

import datetime
import pandas as pd
import numpy as np
import time
from contextlib import closing
import pymysql

sys.path.append(sys.prefix + "\\Lib\\MyWheels")
reload(sys)
sys.setdefaultencoding('utf8')
import set_log  # log_obj.debug(文本)  "\x1B[1;32;41m (文本)\x1B[0m"

log_obj = set_log.Logger('update_excel.log', set_log.logging.WARNING,
                         set_log.logging.DEBUG)
log_obj.cleanup('update_excel.log', if_cleanup=True)  # 是否需要在每次运行程序前清空Log文件


class update_excel(object):
    def __init__(self):
        pass

    def get_data(self, code, date1, date2):
        date1 = date1.strftime('%Y-%m-%d')
        date2 = date2.strftime('%Y-%m-%d')
        sql = 'SELECT `crawler_key`, `fund_code`, `value_date`,`accumulative_net_value` FROM `eastmoney_daily_data` WHERE `fund_code` = %s AND (`value_date` BETWEEN "%s" AND "%s")' % (
            code, date1, date2)
        with closing(
                pymysql.connect('10.10.10.15',
                                'spider',
Example #14
0
    @time: 2017/2/21 9:38
--------------------------------
"""
import sys
import os
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

sys.path.append(sys.prefix + "\\Lib\\MyWheels")
reload(sys)
sys.setdefaultencoding('utf8')
import set_log  # log_obj.debug(文本)  "\x1B[1;32;41m (文本)\x1B[0m"

log_obj = set_log.Logger('decision_tree.log', set_log.logging.WARNING,
                         set_log.logging.DEBUG)
log_obj.cleanup('decision_tree.log', if_cleanup=True)  # 是否需要在每次运行程序前清空Log文件

print("""


这里使用的是ID3算法来构造决策树


""")


class decision_tree(object):
    def __init__(self, target_type):
        print("===》需要分类的列为 %s 列" % target_type)
        self.target_type = target_type
Example #15
0
import pandas as pd
import requests
import selenium.webdriver
import string
import os
import time
import sys


sys.path.append(sys.prefix + "\\Lib\\MyWheels")
reload(sys)
sys.setdefaultencoding('utf8')
import set_log  # log_obj.debug(文本)  "\x1B[1;32;41m (文本)\x1B[0m"
import csv_report

log_obj = set_log.Logger('hnfgw_spider.log', set_log.logging.WARNING,
                         set_log.logging.DEBUG)
log_obj.cleanup('hnfgw_spider.log', if_cleanup=True)  # 是否需要在每次运行程序前清空Log文件

key_list = [u'项目名称', u'开发公司', u'所在区域', u'容积率', u'规划总建筑面积(平方米)',
            u'未售总套数', u'未售总面积(平方米)']

class hnfgw_spider(object):
    def __init__(self):
        self.csv_report = csv_report.csv_report()
        self.headers = {'Accept': '*/*',
                       'Accept-Language': 'en-US,en;q=0.8',
                       'Cache-Control': 'max-age=0',
                       'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36',
                       'Connection': 'keep-alive'
                       }
Example #16
0
    @Contact: [email protected]
    @file: Logistic_regression.py
    @time: 2017/12/28 16:32
--------------------------------
"""
import sys
import os
import pandas as pd
import numpy as np

sys.path.append(sys.prefix + "\\Lib\\MyWheels")
reload(sys)
sys.setdefaultencoding('utf8')
import set_log  # log_obj.debug(文本)  "\x1B[1;32;41m (文本)\x1B[0m"

log_obj = set_log.Logger('Logistic_regression.log', set_log.logging.WARNING,
                         set_log.logging.DEBUG)
log_obj.cleanup('Logistic_regression.log', if_cleanup=True)  # 是否需要在每次运行程序前清空Log文件


class Logistic_regression(object):

    def __init__(self):
        pass

    def gradient_ascent(self, df, class_col):
        alpha = 0.001
        max_cycles = 500

        target_col = df[class_col].copy()
        df = df.drop([class_col,], axis=1)
        for i in range(max_cycles):
Example #17
0
import os
import traceback
import json
import pandas as pd
import bs4

sys.path.append(sys.prefix + "\\Lib\\MyWheels")
reload(sys)
sys.setdefaultencoding('utf8')
import set_log  # log_obj.debug(文本)  "\x1B[1;32;41m (文本)\x1B[0m"
import driver_manager
import requests_manager
requests_manager = requests_manager.requests_manager()
driver_manager = driver_manager.driver_manager()

log_obj = set_log.Logger('crawler.log', set_log.logging.WARNING,
                         set_log.logging.DEBUG)
log_obj.cleanup('crawler.log', if_cleanup=True)  # 是否需要在每次运行程序前清空Log文件

class crawler(object):
    def __init__(self):
        with open('page_count.json', 'r') as f:
            self.page_count = json.load(f)
        self.used_urls = set()

    def main(self):
        #s_list = self.get_urls()
        with open('urls.txt', 'r') as f:
            s = f.read()
            s_list = s.split('\n')

        for s0 in s_list:
    @Contact: [email protected]
    @file: command.py
    @time: 2017/6/23 13:59
--------------------------------
"""
import re
import sys
import os
import sqlite3

sys.path.append(sys.prefix + "\\Lib\\MyWheels")
reload(sys)
sys.setdefaultencoding('utf8')
import set_log  # log_obj.debug(文本)  "\x1B[1;32;41m (文本)\x1B[0m"

log_obj = set_log.Logger('command.log', set_log.logging.WARNING,
                         set_log.logging.DEBUG)
log_obj.cleanup('command.log', if_cleanup=True)  # 是否需要在每次运行程序前清空Log文件

#'CREAT TABLE Double_Check_URL(`id` INT(10) PRIMARY KEY NOT NULL AUTO_INCREMENT, `insert_time` timestamp not null default current_timestamp, `url` VARCHAR(255))'

class command(object):
    def __init__(self):
        pass

    def check_url(self, url):
        sql = "SELECT * FROM `Double_Check_URL` WHERE `url` = \"%s\"" %url
        resp = self.connect(sql)
        m = re.search(r'\([^\(\)]+?\)', str(resp))
        if m:
            return True
        else:
Example #19
0
import cal_fitting_net_value
cal_fitting_net_value = cal_fitting_net_value.cal_fitting_net_value()
import requests_manager
requests_manager = requests_manager.requests_manager()
import json
import time
import calculation
calculation = calculation.calculation()

sys.path.append(sys.prefix + "\\Lib\\MyWheels")
reload(sys)
sys.setdefaultencoding('utf8')
import set_log  # log_obj.debug(文本)  "\x1B[1;32;41m (文本)\x1B[0m"

log_obj = set_log.Logger('compare_net_value.log', set_log.logging.WARNING,
                         set_log.logging.DEBUG)
log_obj.cleanup('compare_net_value.log',
                if_cleanup=True)  # 是否需要在每次运行程序前清空Log文件

user_key = '1923eae2a3054d4c92a7eb74d7f65396'


class compare_net_value(object):
    def __init__(self):
        pass

    def get_net_value(self, fund_code, date_str1, date_str2):
        sql = "SELECT `fund_code`, `value_date`, `accumulative_net_value` FROM `eastmoney_daily_data` " \
              "WHERE `fund_code` = '%s' AND `value_date` BETWEEN '%s' AND '%s'" %(fund_code, date_str1, date_str2)

        with closing(
import announcements_monitor.items
import re
import traceback
import datetime
import bs4
import json
log_path = r'%s/log/spider_DEBUG(%s).log' % (
    os.getcwd(), datetime.datetime.date(datetime.datetime.today()))

sys.path.append(sys.prefix + "\\Lib\\MyWheels")
reload(sys)
sys.setdefaultencoding('utf8')
import set_log  # log_obj.debug(文本)  "\x1B[1;32;41m (文本)\x1B[0m"
import csv_report

log_obj = set_log.Logger(log_path, set_log.logging.WARNING,
                         set_log.logging.DEBUG)
log_obj.cleanup(log_path, if_cleanup=False)  # 是否需要在每次运行程序前清空Log文件
csv_report = csv_report.csv_report()
"""
bs = bs4.BeautifulSoup(s,'html.parser')
e_trs = bs.find_all('tr')
e_trs[0].get_text()
"""
with open(os.getcwd() + r'\announcements_monitor\spiders\needed_data.txt',
          'r') as f:
    s = f.read()
    needed_data = s.split(',')
needed_data = [s.encode('utf8') for s in needed_data]

title_type1 = [
    'parcel_no', 'parcel_location', '用地面积(㎡)', 'offer_area_m2', 'purpose',
Example #21
0
"""
import sys
import os

import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates

sys.path.append(sys.prefix + "\\Lib\\MyWheels")
reload(sys)
sys.setdefaultencoding('utf8')
import set_log  # log_obj.debug(文本)  "\x1B[1;32;41m (文本)\x1B[0m"

log_obj = set_log.Logger('plot_manager.log', set_log.logging.WARNING,
                         set_log.logging.DEBUG)
log_obj.cleanup('plot_manager.log', if_cleanup=True)  # 是否需要在每次运行程序前清空Log文件


class plot_manager(object):

    def __init__(self):
        pass

    def two_axis(self, df, ax_cols1, ax_cols2, **kwargs):
        fig = plt.figure()
        ax1 = fig.add_subplot(111)

        if 'x_axis' in kwargs:
            df = df.set_index(kwargs['x_axis'])