コード例 #1
0
ファイル: sw.py プロジェクト: kingofhawks/stocktrace
def parse_sw_history2(begin_date='2014-03-12', end_date=None, code='801150'):
    if end_date is None:
        now = arrow.now()
        end_date = str(now.date())
    condition = 'swindexcode=\'{}\' and BargainDate>=\'{}\' and BargainDate<=\'{}\' and type=\'Day\''
    where = condition.format(code, begin_date, end_date)
    all_data = []
    for index in range(1, 1000):
        payload = {'tablename':'V_Report',
                'key': 'id',
                'p': index,
                'where': where,
                'orderby': 'swindexcode asc,BargainDate_1',
                'fieldlist': 'SwIndexCode,SwIndexName,BargainDate,CloseIndex,BargainAmount,Markup,'
                               'TurnoverRate,PE,PB,MeanPrice,BargainSumRate,DP',
                'pagecount': 993,
                'timed': 1456667319778
        }
        url = 'http://www.swsindex.com/handler.aspx'
        res = requests.post(url, data=payload)
        data = res.text.replace('\'', '\"')
        print(data)
        result = json.loads(data)
        data_list = result.get('root')
        # print 'url****'+url
        # print len(data_list)
        if len(data_list) == 0:
            break
        else:
           all_data.extend(data_list)
    df = DataFrame(all_data)
    # print df
    # print df.info()
    # print df.describe()
    # print df['PE']
    # print df[df['BargainDate'] == '2015-10-16 0:00:00']

    if 'PE' not in df:
        return
    # clean data with empty PE or PB
    df = df[df['PE'] != '']
    df = df[df['PB'] != '']

    # convert string to datetime(timestamp)
    df['BargainDate'] = pd.to_datetime(df['BargainDate'])

    # convert string to float
    df[['PE', 'PB']] = df[['PE', 'PB']].astype(float)
    print(df)
    # df_sort_pe = df.sort(columns='PE', ascending=True)
    df_sort_pe = df.sort_values(by='PE', ascending=True)
    # print df_sort_pe
    # df_sort_pb = df.sort(columns='PB', ascending=True)
    df_sort_pb = df.sort_values(by='PB', ascending=True)
    # print df_sort_pb
    # print 'PE mean:{}'.format(df['PE'].mean())
    # print 'PB mean:{}'.format(df['PB'].mean())
    # print 'PB<1:{}'.format(df[df.PB < 1])
    return df
コード例 #2
0
ファイル: parse.py プロジェクト: smartree/stocktrace
def parse_sw_history2(begin_date='2014-03-12', end_date=None, code='801150'):
    if end_date is None:
        now = arrow.now()
        end_date = str(now.date())
    condition = 'swindexcode=\'{}\' and BargainDate>=\'{}\' and BargainDate<=\'{}\' and type=\'Day\''
    where = condition.format(code, begin_date, end_date)
    all_data = []
    for index in range(1, 1000):
        payload = {'tablename':'V_Report',
                'key': 'id',
                'p': index,
                'where': where,
                'orderby': 'swindexcode asc,BargainDate_1',
                'fieldlist': 'SwIndexCode,SwIndexName,BargainDate,CloseIndex,BargainAmount,Markup,'
                               'TurnoverRate,PE,PB,MeanPrice,BargainSumRate,DP',
                'pagecount': 1,
                'timed': 1456667319778
        }
        url = 'http://www.swsindex.com/handler.aspx'
        res = requests.post(url, data=payload)
        data = res.text.replace('\'', '\"')
        # print data
        result = json.loads(data)
        data_list = result.get('root')
        # print 'url****'+url
        # print len(data_list)
        if len(data_list) == 0:
            break
        else:
           all_data.extend(data_list)
    df = DataFrame(all_data)
    # print df
    # print df.info()
    # print df.describe()
    # print df['PE']
    # print df[df['BargainDate'] == '2015-10-16 0:00:00']

    # clean data with empty PE or PB
    df = df[df['PE'] != '']
    df = df[df['PB'] != '']

    # convert string to datetime(timestamp)
    df['BargainDate'] = pd.to_datetime(df['BargainDate'])

    # convert string to float
    df[['PE', 'PB']] = df[['PE', 'PB']].astype(float)
    print(df)
    # df_sort_pe = df.sort(columns='PE', ascending=True)
    df_sort_pe = df.sort_values(by='PE', ascending=True)
    # print df_sort_pe
    # df_sort_pb = df.sort(columns='PB', ascending=True)
    df_sort_pb = df.sort_values(by='PB', ascending=True)
    # print df_sort_pb
    # print 'PE mean:{}'.format(df['PE'].mean())
    # print 'PB mean:{}'.format(df['PB'].mean())
    # print 'PB<1:{}'.format(df[df.PB < 1])
    return df
コード例 #3
0
ファイル: column.py プロジェクト: laura-dietz/minir-plots
def main():
        mpl.use("Agg")
        def read_ssv(fname):
            lines = [line.split() for line in open(fname, 'r')]
            if args.format.lower() == 'galago_eval':
                return lines
            elif args.format.lower() == 'trec_eval':
                return [[line[1], line[0]] + line[2:] for line in lines]


        def readNumQueries(run):
            tsv = read_ssv(run)
            data = [int(row[2]) for row in tsv if row[0] == "all" and row[1] == numQueries_key]
            return data[0]


        def findQueriesWithNanValues(run):
            tsv = read_ssv(run)
            # print ("tsv,", tsv)
            queriesWithNan = {row[0] for row in tsv if row[1] == 'num_rel' and (float(row[2]) == 0.0 or math.isnan(float(row[2])))}
            return queriesWithNan

        def fetchValues(run):
            tsv = read_ssv(run)
            data = {row[0]: float(row[2]) for row in tsv if row[1] == args.metric and not math.isnan(float(row[2]))}
            return data

        args = parser.parse_args()

        pairedt = pairedttest.pairedt(best=True, format=args.format, metric=args.metric, runs=args.runs)
        print("paired t")
        print(pairedt)
        print("=-----=")

        numQueries_key = "num_q"
        print("column.py metric="+args.metric+" out="+args.out)
        
        datas = {run: fetchValues(run) for run in args.runs}
        
        # deal with nans
        queriesWithNanValues = {'all'}.union(*[findQueriesWithNanValues(run) for run in args.runs])
        basedata=datas[args.runs[0]]
        queries = set(basedata.keys()).difference(queriesWithNanValues)
        numQueries = readNumQueries(args.runs[0]) if args.c else len(queries)
        
        seriesDict = {'mean':dict(), 'stderr':dict()}
        
        
        for run in datas:
            data = datas[run]
            
            if sum(not key in data for key in queries) > 0:
                print("data for run "+run+" does not contain all queries "+" ".join(queries))

            mean = np.sum([data.get(key, 0.0) for key in queries]) / numQueries
            stderr = np.std([data.get(key, 0.0) for key in queries] + ([0.0]* (numQueries - len(queries)))) / sqrt(numQueries)
            seriesDict['mean'][run]=mean
            seriesDict['stderr'][run]=stderr




        print( "dropping queries because of NaN values: "+ " ".join(queriesWithNanValues))

        print ('\t'.join(['run', 'mean/stderr']))
        for run in datas:
            #if not run == args.runs[0]:
            print ('\t'.join([run, str(seriesDict['mean'][run]), str(seriesDict['stderr'][run])]))


        df1 = DataFrame(seriesDict, index=pd.Index(args.runs))
        if args.sort:
                df1.sort_values('mean',ascending=False,inplace=True) 
        df2 = df1['mean']
        df2.index=[os.path.basename(label) for label in df1.index]
        df1.index=[os.path.basename(label) for label in df1.index]

        print('df2.index=',df2.index)

        df2.text=['**' if (math.isnan(pairedt[label][1]) or pairedt[label][1]>0.05) else '' for label in df2.index]
        min_same_idx = max( [i if (math.isnan(pairedt[label][1]) or pairedt[label][1]>0.05) else 0 for i,label in enumerate(df2.index)])


        cs = {k:v for k,v in zip(sorted(list(set([label[0:3] for label in df1.index]))), itertools.cycle(['0.1','0.9','0.5','0.3','0.7','0.2','0.8', '0.4','0.6'])) }
        df1['color']=[cs[label[0:3]] for label in df1.index]
        print(df1['color'])
        plt.tick_params(colors=df1.color)
        fig, ax = plt.subplots()


        df2.plot.bar(yerr = df1['stderr'], color=df1.color.values,  ax=ax)

        for (p, i) in zip(ax.patches,range(100)):
            if args.sort :

                if i==min_same_idx:
                    frompoint=(p.get_x()+p.get_width(), p.get_height()/2.0)
                    topoint=(0.0-p.get_width()/2.0, p.get_height()/2.0)
                    ax.annotate("",
                                xy=topoint, xycoords='data',
                                xytext=frompoint, textcoords='data',
                                arrowprops=dict(arrowstyle="<|-|>",
                                                connectionstyle="arc3", ec='r'),
                                )
            else:
                ax.annotate(df2.text[i], xy=(p.get_x() + p.get_width() / 2.0, p.get_height()*0.9), ha='center', va='center',)

        ax.grid()
        plt.ylabel(args.metric, fontsize=20)
        plt.tick_params(axis='both', which='major', labelsize=20)
        plt.xticks(rotation=90)
        plt.savefig(args.out, bbox_inches='tight')