Python DataFrameの例、pandas.util.testing.DataFrame Pythonの例

コード例 #1

0

ファイルを表示

ファイル: sw.py プロジェクト: kingofhawks/stocktrace

def parse_sw_history2(begin_date='2014-03-12', end_date=None, code='801150'):
    if end_date is None:
        now = arrow.now()
        end_date = str(now.date())
    condition = 'swindexcode=\'{}\' and BargainDate>=\'{}\' and BargainDate<=\'{}\' and type=\'Day\''
    where = condition.format(code, begin_date, end_date)
    all_data = []
    for index in range(1, 1000):
        payload = {'tablename':'V_Report',
                'key': 'id',
                'p': index,
                'where': where,
                'orderby': 'swindexcode asc,BargainDate_1',
                'fieldlist': 'SwIndexCode,SwIndexName,BargainDate,CloseIndex,BargainAmount,Markup,'
                               'TurnoverRate,PE,PB,MeanPrice,BargainSumRate,DP',
                'pagecount': 993,
                'timed': 1456667319778
        }
        url = 'http://www.swsindex.com/handler.aspx'
        res = requests.post(url, data=payload)
        data = res.text.replace('\'', '\"')
        print(data)
        result = json.loads(data)
        data_list = result.get('root')
        # print 'url****'+url
        # print len(data_list)
        if len(data_list) == 0:
            break
        else:
           all_data.extend(data_list)
    df = DataFrame(all_data)
    # print df
    # print df.info()
    # print df.describe()
    # print df['PE']
    # print df[df['BargainDate'] == '2015-10-16 0:00:00']

    if 'PE' not in df:
        return
    # clean data with empty PE or PB
    df = df[df['PE'] != '']
    df = df[df['PB'] != '']

    # convert string to datetime(timestamp)
    df['BargainDate'] = pd.to_datetime(df['BargainDate'])

    # convert string to float
    df[['PE', 'PB']] = df[['PE', 'PB']].astype(float)
    print(df)
    # df_sort_pe = df.sort(columns='PE', ascending=True)
    df_sort_pe = df.sort_values(by='PE', ascending=True)
    # print df_sort_pe
    # df_sort_pb = df.sort(columns='PB', ascending=True)
    df_sort_pb = df.sort_values(by='PB', ascending=True)
    # print df_sort_pb
    # print 'PE mean:{}'.format(df['PE'].mean())
    # print 'PB mean:{}'.format(df['PB'].mean())
    # print 'PB<1:{}'.format(df[df.PB < 1])
    return df

コード例 #2

0

ファイルを表示

ファイル: parse.py プロジェクト: smartree/stocktrace

def parse_sw_history2(begin_date='2014-03-12', end_date=None, code='801150'):
    if end_date is None:
        now = arrow.now()
        end_date = str(now.date())
    condition = 'swindexcode=\'{}\' and BargainDate>=\'{}\' and BargainDate<=\'{}\' and type=\'Day\''
    where = condition.format(code, begin_date, end_date)
    all_data = []
    for index in range(1, 1000):
        payload = {'tablename':'V_Report',
                'key': 'id',
                'p': index,
                'where': where,
                'orderby': 'swindexcode asc,BargainDate_1',
                'fieldlist': 'SwIndexCode,SwIndexName,BargainDate,CloseIndex,BargainAmount,Markup,'
                               'TurnoverRate,PE,PB,MeanPrice,BargainSumRate,DP',
                'pagecount': 1,
                'timed': 1456667319778
        }
        url = 'http://www.swsindex.com/handler.aspx'
        res = requests.post(url, data=payload)
        data = res.text.replace('\'', '\"')
        # print data
        result = json.loads(data)
        data_list = result.get('root')
        # print 'url****'+url
        # print len(data_list)
        if len(data_list) == 0:
            break
        else:
           all_data.extend(data_list)
    df = DataFrame(all_data)
    # print df
    # print df.info()
    # print df.describe()
    # print df['PE']
    # print df[df['BargainDate'] == '2015-10-16 0:00:00']

    # clean data with empty PE or PB
    df = df[df['PE'] != '']
    df = df[df['PB'] != '']

    # convert string to datetime(timestamp)
    df['BargainDate'] = pd.to_datetime(df['BargainDate'])

    # convert string to float
    df[['PE', 'PB']] = df[['PE', 'PB']].astype(float)
    print(df)
    # df_sort_pe = df.sort(columns='PE', ascending=True)
    df_sort_pe = df.sort_values(by='PE', ascending=True)
    # print df_sort_pe
    # df_sort_pb = df.sort(columns='PB', ascending=True)
    df_sort_pb = df.sort_values(by='PB', ascending=True)
    # print df_sort_pb
    # print 'PE mean:{}'.format(df['PE'].mean())
    # print 'PB mean:{}'.format(df['PB'].mean())
    # print 'PB<1:{}'.format(df[df.PB < 1])
    return df

コード例 #3

0

ファイルを表示

ファイル: parse.py プロジェクト: smartree/stocktrace

def ah_premium_index(samples=[('600036', '03968'), ('600196', '02196'), ('601111', '00753')]):
    samples = [('600585', '00914'), ('601318', '02318'), ('000002', '02202'),
               ('600036', '03968'), ('600600', '00168'), ('600196', '02196'),
               ('600030', '06030'), ('600028', '00386'), ('601601', '02601'),
               ('601628', '02628'), ('000063', '00763'), ('601398', '01398'),
               ('601939', '00939'), ('601288', '01288'), ('600837', '06837'),
               ('601607', '02607'), ('600011', '00902'), ('002202', '02208'),
               ('601988', '03988'), ('601818', '06818'), ('601336', '01336'),
               ('600027', '01071'), ('601088', '01088'), ('601328', '03328'),
               ('600016', '01988'), ('601998', '00998'), ('601186', '01186'),
               ('600332', '00874'), ('601766', '01766'), ('002594', '01211'),
               ('601857', '00857'), ('000039', '02039'), ('600362', '00358'),
               ('600012', '00995'), ('601633', '02333'), ('601800', '01800'),
               ('601333', '00525'), ('601111', '00753'), ('600875', '01072'),
               ('601390', '00390'), ('601898', '01898'), ('601899', '02899'),
               ('000898', '00347'), ('000157', '01157'), ('600685', '00317'),
               ('601992', '02009'), ('601600', '02600'), ('601991', '00991'),
               ('600115', '00670'), ('601808', '02883'), ('600871', '01033'),
               ('601727', '02727'), ('600188', '01171'), ('601238', '02238'),
               ('601919', '01919'), ('601866', '02866'), ('601618', '01618'),
               ('600026', '01138'), ('601880', '02880'), ('600874', '01065'),
               ('600660', '03606'), ('600377', '00177'), ('000776', '01776'),
               ('601688', '06886'), ('000338', '02338'), ('600029', '01055'),
               ('603993', '03993'), ('601005', '01053'), ('600688', '00338'),
               ('600548', '00548'), ('002672', '00895'), ('000513', '01513'),
               ('000488', '01812'), ('601107', '00107'), ('601588', '00588'),
               ('600808', '00323'), ('000921', '00921'), ('600775', '00553'),
               ('600860', '00187'), ('000756', '00719'), ('601038', '00038'),
               ('600806', '00300'), ('002490', '00568'), ('002703', '01057'),
               ('600876', '01108'), ('601717', '00564'), ('000585', '00042')]
    a_list = []
    h_list = []
    price_a_list = []
    price_h_list = []
    ratio_list = []
    hk_to_rmb = float(rmb_exchange_rate()[0])/100
    for sample in samples:
        ratio = ah_ratio(hk_to_rmb, sample)
        if ratio:
            a_list.append(sample[0])
            h_list.append(sample[1])
            price_a_list.append(ratio.get('price_a'))
            price_h_list.append(ratio.get('price_h'))
            ratio_list.append(ratio.get('ratio'))
    df_dict = {'A': a_list, 'Price_A': price_a_list, 'H': h_list, 'Price_H': price_h_list, 'ratio': ratio_list}
    # print df_dict
    df = DataFrame(df_dict)
    # print df
    df = df.sort(columns='ratio', ascending=True)
    # print df
    # ah_index = np.mean(ratio_list)
    ah_index = df['ratio'].mean()
    # print 'ah_index:{}'.format(ah_index)
    # print 'discount stock:{}'.format(df[df.ratio < 1])
    return AhIndex(ah_index)

コード例 #4

0

ファイルを表示

ファイル: parse.py プロジェクト: cfan0330github/stocktrace

def ah_premium_index(samples=[('600036', '03968'), ('600196', '02196'), ('601111', '00753')]):
    samples = [('600585', '00914'), ('601318', '02318'), ('000002', '02202'),
               ('600036', '03968'), ('600600', '00168'), ('600196', '02196'),
               ('600030', '06030'), ('600028', '00386'), ('601601', '02601'),
               ('601628', '02628'), ('000063', '00763'), ('601398', '01398'),
               ('601939', '00939'), ('601288', '01288'), ('600837', '06837'),
               ('601607', '02607'), ('600011', '00902'), ('002202', '02208'),
               ('601988', '03988'), ('601818', '06818'), ('601336', '01336'),
               ('600027', '01071'), ('601088', '01088'), ('601328', '03328'),
               ('600016', '01988'), ('601998', '00998'), ('601186', '01186'),
               ('600332', '00874'), ('601766', '01766'), ('002594', '01211'),
               ('601857', '00857'), ('000039', '02039'), ('600362', '00358'),
               ('600012', '00995'), ('601633', '02333'), ('601800', '01800'),
               ('601333', '00525'), ('601111', '00753'), ('600875', '01072'),
               ('601390', '00390'), ('601898', '01898'), ('601899', '02899'),
               ('000898', '00347'), ('000157', '01157'), ('600685', '00317'),
               ('601992', '02009'), ('601600', '02600'), ('601991', '00991'),
               ('600115', '00670'), ('601808', '02883'), ('600871', '01033'),
               ('601727', '02727'), ('600188', '01171'), ('601238', '02238'),
               ('601919', '01919'), ('601866', '02866'), ('601618', '01618'),
               ('600026', '01138'), ('601880', '02880'), ('600874', '01065'),
               ('600660', '03606'), ('600377', '00177'), ('000776', '01776'),
               ('601688', '06886'), ('000338', '02338'), ('600029', '01055'),
               ('603993', '03993'), ('601005', '01053'), ('600688', '00338'),
               ('600548', '00548'), ('002672', '00895'), ('000513', '01513'),
               ('000488', '01812'), ('601107', '00107'), ('601588', '00588'),
               ('600808', '00323'), ('000921', '00921'), ('600775', '00553'),
               ('600860', '00187'), ('000756', '00719'), ('601038', '00038'),
               ('600806', '00300'), ('002490', '00568'), ('002703', '01057'),
               ('600876', '01108'), ('601717', '00564'), ('000585', '00042')]
    a_list = []
    h_list = []
    price_a_list = []
    price_h_list = []
    ratio_list = []
    hk_to_rmb = float(rmb_exchange_rate()[0])/100
    for sample in samples:
        ratio = ah_ratio(hk_to_rmb, sample)
        if ratio:
            a_list.append(sample[0])
            h_list.append(sample[1])
            price_a_list.append(ratio.get('price_a'))
            price_h_list.append(ratio.get('price_h'))
            ratio_list.append(ratio.get('ratio'))
    df_dict = {'A': a_list, 'Price_A': price_a_list, 'H': h_list, 'Price_H': price_h_list, 'ratio': ratio_list}
    print df_dict
    df = DataFrame(df_dict)
    # print df
    df = df.sort(columns='ratio', ascending=True)
    print df
    # ah_index = np.mean(ratio_list)
    ah_index = df['ratio'].mean()
    print 'ah_index:{}'.format(ah_index)
    print 'discount stock:{}'.format(df[df.ratio < 1])
    return AhIndex(ah_index)

コード例 #5

0

ファイルを表示

ファイル: parse.py プロジェクト: cfan0330github/stocktrace

def parse_sw_history(begin_date='2014-03-12', end_date=None, codes=None):
    if end_date is None:
        now = arrow.now()
        end_date = str(now.date())
    if codes is None:
        codes = ('801010', '801020', '801030', '801040', '801050', '801060', '801070', '801080', '801090',
                 '801100', '801110', '801120', '801130', '801140', '801150', '801160', '801170', '801180', '801190',
                 '801200', '801210', '801220', '801230',
                 '801710', '801720', '801730', '801740', '801750', '801760', '801770', '801780', '801790',
                 '801880', '801890')
    condition = 'swindexcode in {} and BargainDate>=\'{}\' and BargainDate<=\'{}\''
    where = condition.format(codes, begin_date, end_date)
    print where
    all_data = []
    for index in range(1, 1000):
        payload = {'tablename':'swindexhistory',
                'key': 'id',
                'p': index,
                'where': where,
                'orderby': 'swindexcode asc,BargainDate_1',
                'fieldlist': 'SwIndexCode,SwIndexName,BargainDate,CloseIndex,BargainAmount,Markup,'
                               'TurnoverRate,PE,PB,MeanPrice,BargainSumRate,DP',
                'pagecount': 28,
                'timed': 1453385628267
            }
        url = 'http://www.swsindex.com/handler.aspx'
        res = requests.post(url, data=payload)
        data = res.text.replace('\'', '\"')
        result = json.loads(data)
        data_list = result.get('root')
        print 'url****'+url
        print len(data_list)
        if len(data_list) == 0:
            break
        else:
           all_data.extend(data_list)
    df = DataFrame(all_data)
    df[['PE', 'PB']] = df[['PE', 'PB']].astype(float)
    # df['PE'] = df['PE'].astype(float)
    # df['PB'] = df['PB'].astype(float)
    print '*'*20
    print len(df)
    print df
    df = df.sort(columns='PE', ascending=True)
    print df
    df = df.sort(columns='PB', ascending=True)
    print df
    print 'PE mean:{}'.format(df['PE'].mean())
    print 'PB mean:{}'.format(df['PB'].mean())
    print 'PB<1:{}'.format(df[df.PB < 1])
    return df

コード例 #6

0

ファイルを表示

ファイル: parse.py プロジェクト: smartree/stocktrace

def parse_sw_history(begin_date='2014-03-12', end_date=None, codes=None):
    if end_date is None:
        now = arrow.now()
        end_date = str(now.date())
    if codes is None:
        codes = ('801010', '801020', '801030', '801040', '801050', '801060', '801070', '801080', '801090',
                 '801100', '801110', '801120', '801130', '801140', '801150', '801160', '801170', '801180', '801190',
                 '801200', '801210', '801220', '801230',
                 '801710', '801720', '801730', '801740', '801750', '801760', '801770', '801780', '801790',
                 '801880', '801890')
    condition = 'swindexcode in {} and BargainDate>=\'{}\' and BargainDate<=\'{}\''
    where = condition.format(codes, begin_date, end_date)
    # print where
    all_data = []
    for index in range(1, 1000):
        payload = {'tablename':'swindexhistory',
                'key': 'id',
                'p': index,
                'where': where,
                'orderby': 'swindexcode asc,BargainDate_1',
                'fieldlist': 'SwIndexCode,SwIndexName,BargainDate,CloseIndex,BargainAmount,Markup,'
                               'TurnoverRate,PE,PB,MeanPrice,BargainSumRate,DP',
                'pagecount': 28,
                'timed': 1453385628267
            }
        url = 'http://www.swsindex.com/handler.aspx'
        res = requests.post(url, data=payload)
        data = res.text.replace('\'', '\"')
        result = json.loads(data)
        data_list = result.get('root')
        # print 'url****'+url
        # print len(data_list)
        if len(data_list) == 0:
            break
        else:
           all_data.extend(data_list)
    df = DataFrame(all_data)
    df[['PE', 'PB']] = df[['PE', 'PB']].astype(float)
    # df['PE'] = df['PE'].astype(float)
    # df['PB'] = df['PB'].astype(float)
    # print '*'*20
    # print len(df)
    # print df
    df = df.sort(columns='PE', ascending=True)
    # print df
    df = df.sort(columns='PB', ascending=True)
    # print df
    # print 'PE mean:{}'.format(df['PE'].mean())
    # print 'PB mean:{}'.format(df['PB'].mean())
    # print 'PB<1:{}'.format(df[df.PB < 1])
    return df

コード例 #7

0

ファイルを表示

ファイル: parse.py プロジェクト: jjhua/stocktrace

def xueqiu_history(code='600036', access_token=xq_a_token, begin_date=None, end_date=None):
    if begin_date is None:
        begin = arrow.get('2014-01-01')
        begin_date = begin.timestamp*1000
        # print begin_date
    if end_date is None:
        end = arrow.now()
        end_date = end.timestamp*1000
    if len(code) == 8:
        pass
    elif code.startswith('60') or code.startswith('51'):
        code = 'SH'+code
    elif len(code) == 5:
        code = 'HK'+code
    elif len(code) == 6:
        code = 'SZ'+code

    url = 'http://xueqiu.com/stock/forchartk/stocklist.json?symbol={}&period=1day&type=normal&begin={}&end={}&_=1443694358741'
    url = url.format(code, begin_date, end_date)
    payload = {'access_token': access_token}

    r = requests.get(url, params=payload, headers=headers)
    # print r.json()
    data_list = r.json().get('chartlist')
    # print data_list
    # print len(data_list)
    result = []
    for data in data_list:
        # print data
        time = data.get('time')
        time = arrow.get(time, 'ddd MMM DD HH:mm:ss Z YYYY')
        # print time
        timestamp = time.timestamp*1000
        history = StockHistory(code=code, percent=data.get('percent'),
                               ma5=data.get('ma5'), ma10=data.get('ma10'), ma30=data.get('ma30'),
                               open_price=data.get('open'), high=data.get('high'), low=data.get('low'),
                               close=data.get('close'), time=time.datetime, timestamp=timestamp,
                               volume=data.get('volume'),
                               # 注：指数无法取得换手率
                               turn_rate=data.get('turnrate'))
        # print history
        result.append(history)
    df = DataFrame(data_list)
    # print df
    max_turnover = df['turnrate'].max()
    min_turnover = df['turnrate'].min()
    # print df['turnrate'].mean()
    # max_turnover_index = df.loc[df['turnrate'] == max_turnover].index
    # print max_turnover_index
    columns = ['time', 'turnrate', 'volume', 'close']
    # print df.loc[df['turnrate'] == max_turnover][columns]
    # print df.loc[df['turnrate'] == min_turnover][columns]
    max_volume = df['volume'].max()
    min_volume = df['volume'].min()
    mean_volume = df['volume'].mean()
    # print df.loc[df['volume'] == max_volume][columns]
    # print df.loc[df['volume'] == min_volume][columns]
    return result

コード例 #8

0

ファイルを表示

ファイル: csi.py プロジェクト: smartree/stocktrace

def read_index2(code='000905'):
    url = 'http://www.csindex.com.cn/uploads/file/autofile/perf/{}perf.xls'.format(
        code)
    book = get_excel_book(url)
    # print(book)
    if code == '000300':
        name = '沪深300'
    elif code == '000905':
        name = '中证500'
    elif code == '000016':
        name = '上证50'
    for sheet in range(book.nsheets):
        sh = book.sheet_by_index(sheet)
        for rx in range(sh.nrows):
            row = sh.row(rx)
            df = DataFrame(row)
            # print(df)
            print(row)
            print(len(row))
            if len(row) > 15:
                date = row[0].value
                pe1 = row[15].value
                pe2 = row[16].value
                dividend_yield_ratio1 = row[17].value
                dividend_yield_ratio2 = row[18].value
                turnover = row[13].value
                # # print(type(pe))
                if date and pe1 and type(pe1) == float:
                    py_date = xlrd.xldate.xldate_as_datetime(
                        date, book.datemode)
                    print(py_date)
                    date = str(py_date)
                    print(pd.to_datetime(date))
                    Index.objects(name=name, date=date).update_one(
                        name=name,
                        date=date,
                        pe=pe1,
                        pe_ttm=pe2,
                        dividend_yield_ratio=dividend_yield_ratio1,
                        turnover=turnover,
                        upsert=True)

コード例 #9

0

ファイルを表示

ファイル: crime_prediction_classification.py プロジェクト: jainprateek/MachineLearning

        crime_type_dict[type]=value


    for predicted in y_pred:
        for key in crime_type_dict.keys():
            if key!=predicted:
                zero_append = crime_type_dict[key]
                zero_append.append(0)
                crime_type_dict[key] = zero_append
            else:
                one_append = crime_type_dict[key]
                one_append.append(1)
                crime_type_dict[key] = one_append


    output = DataFrame(crime_type_dict)
    #output.index += 1
    output.to_csv('output_predict.csv',sep=',',index_label='Id')

    #print("Number of mislabeled points out of a total %d points : %d" % (X.shape[0],(labels != y_pred).sum()))
    #s = Series(file_header)
    #correlation(training_data,labels)
    # for index, row in data_frame.iterrows():
    #     list_district.append(get_district_mapping(row['PdDistrict']))
    #     list_category.append(get_category_mapping(row['Category']))
    #
    # print 'Number of Districts',len(list_district)
    # print 'Number of Crimes',len(list_category)
    #
    # colors = cm.rainbow(np.linspace(0,1,len(list_district)))
    #

コード例 #10

0

ファイルを表示

begin = blairInside['time'][0]
end = blairInside['time'][-1]
duration = end - begin
steps = 300
slices = [(begin + duration * step / steps,
           begin + duration * (step + 2) / steps)
          for step in range(0, steps - 2)]

slicedDates = [begin + (begin - end) / 2 for (begin, end) in slices
               ]  # re-center date in middle of avg window

idx = pandas.to_datetime(slicedDates, unit='s', utc=True)
df1 = DataFrame(
    {
        'inside': mean_std(blairInside, slices)[0],
        'outside': mean_std(blairOutside, slices)[0]
    },
    index=idx,
    columns=['inside', 'outside'])
df1.plot(kind='line')
plt.gca().xaxis.set_major_formatter(
    matplotlib.dates.DateFormatter('%H:%M', tz=timezone("America/New_York")))


def fill_error_tube(b, color):
    (mean, error) = mean_std(b, slices)
    plt.fill_between(df1.index, mean - error, mean + error, color=color)


fill_error_tube(blairInside, [0.5, 0.5, 0.5, 0.5])
fill_error_tube(blairOutside, [0.5, 0.5, 0.5, 0.5])

コード例 #11

0

ファイルを表示

ファイル: feature-column.py プロジェクト: anlausch/minir-plots

print("feature-column.py metric=" + args.metric + " out=" + args.out)


def read_ssv(fname):
    lines = [line.split() for line in open(fname, 'r')]
    if args.format.lower() == 'galago_eval':
        return lines
    elif args.format.lower() == 'trec_eval':
        return [[line[1], line[0]] + line[2:] for line in lines]


namestsv = read_ssv(args.names)
namesDict = {row[0]: row[2][8:] for row in namestsv}

for run in args.runs:
    tsv = read_ssv(run)
    values = [float(row[2]) for row in tsv if row[0] in namesDict]
    tsv = read_ssv(run)
    labels = [namesDict[row[0]] for row in tsv if row[0] in namesDict]

    df2 = DataFrame(values, index=labels, columns=[os.path.basename(run)])

    plt.figure()
    df2.plot(kind='bar', color=['1.0', '0.70', '0.0', '0.50'])
    plt.ylabel(args.metric, fontsize=20)
    plt.tick_params(axis='both', which='major', labelsize=10)
    plt.xticks(rotation=90)
    plt.savefig(args.out + os.path.basename(run) + '.pdf', bbox_inches='tight')

    # plt.show()

コード例 #12

0

ファイルを表示

ファイル: plot_results.py プロジェクト: Karl-Krauth/Sparse-GP

    def plot_output(name, infile_path, model_names, filter):
        """
        Reads predictions from csv files and generates plots and output csv. Input csv files should be in the
        infile_path with following structure:

        ``infile_path`` /
                    ../any_name/
                                ../config.csv, test_.csv,train_.csv
                    ../any_name2
                                ../config.csv, test_.csv,train_.csv

        The function also exports the data used to generate graphs as csv files the following folder:
            ../graph_data
        these csv files can be used to reproduce outputs.

        Parameters
        ----------
        name : string
         name of the csv files to which data will be exported

        infile_path : string
         the folder which contains csv for configs and test and train

        model_names : list
         name of the sub-directories in ``infile_path`` to consider

        filter : callable
         a filter which will be applied in config files to filter which configs should be considered. For example,
         lambda x: x['method'] == 'full' will only consider outputs which used 'full' method
        """
        graphs = {
            'SSE': {},
            'MSSE': {},
            'NLPD': {},
            'ER': {},
            'intensity': {},
        }
        graph_n = {}
        for m in model_names:
            data_config = PlotOutput.read_config(infile_path + m + '/' + model_logging.CONFIG_FILE_NAME)
            if filter is None or filter(data_config):
                data_test = pandas.read_csv(infile_path + m + '/' + model_logging.PREDICTIONS_FILE_NAME)
                cols = data_test.columns
                dim = 0
                for element in cols:
                    if element.startswith('true_Y'):
                        dim += 1

                data_train = pandas.read_csv(infile_path + m + '/' + model_logging.TRAINING_FILE_NAME)
                Y_mean = data_train['Y_0'].mean()

                Ypred = np.array([data_test['predicted_Y_%d' % (d)] for d in range(dim)])
                Ytrue = np.array([data_test['true_Y_%d' % (d)] for d in range(dim)])
                Yvar = np.array([data_test['predicted_variance_%d' % (d)] for d in range(dim)])

                if not (PlotOutput.config_to_str(data_config) in graph_n.keys()):
                    graph_n[PlotOutput.config_to_str(data_config)] = 0
                graph_n[PlotOutput.config_to_str(data_config)] += 1

                if data_config['ll'] in [CogLL.__name__]:
                    for i in range(Ytrue.shape[0]):
                        Y_mean = data_train['Y_' + str(i)].mean()
                        PlotOutput.add_to_list(graphs['MSSE'], PlotOutput.config_to_str(data_config) + '_' + str(i),
                                               ((Ypred[i] - Ytrue[i])**2).mean() / ((Y_mean - Ytrue[i]) ** 2).mean())
                        NLPD = np.array(data_test['NLPD_' + str(i)])
                        PlotOutput.add_to_list(graphs['NLPD'], PlotOutput.config_to_str(data_config) + '_' + str(i), NLPD)

                if data_config['ll'] in [UnivariateGaussian.__name__, WarpLL.__name__]:
                    NLPD = np.array(data_test['NLPD_0'])
                    PlotOutput.add_to_list(graphs['SSE'], PlotOutput.config_to_str(data_config),
                                           (Ypred[0] - Ytrue[0])**2 / ((Y_mean - Ytrue[0]) **2).mean())
                    PlotOutput.add_to_list(graphs['NLPD'], PlotOutput.config_to_str(data_config), NLPD)

                if data_config['ll'] in [LogisticLL.__name__]:
                    NLPD = np.array(data_test['NLPD_0'])
                    PlotOutput.add_to_list(graphs['ER'], PlotOutput.config_to_str(data_config), np.array([(((Ypred[0] > 0.5) & (Ytrue[0] == -1))
                                                                 | ((Ypred[0] < 0.5) & (Ytrue[0] == 1))
                                                                 ).mean()]))
                    PlotOutput.add_to_list(graphs['NLPD'], PlotOutput.config_to_str(data_config), NLPD)

                if data_config['ll'] in [SoftmaxLL.__name__]:
                    NLPD = np.array(data_test['NLPD_0'])
                    PlotOutput.add_to_list(graphs['ER'], PlotOutput.config_to_str(data_config), np.array(
                        [(np.argmax(Ytrue, axis=0) != np.argmax(Ypred, axis=0)).mean()]))
                    PlotOutput.add_to_list(graphs['NLPD'], PlotOutput.config_to_str(data_config), NLPD)

                if data_config['ll'] in [LogGaussianCox.__name__]:
                    X0 = np.array([data_test['X_0']])

                    PlotOutput.add_to_list(graphs['intensity'], PlotOutput.config_to_str(data_config),
                                           np.array([X0[0,:]/365+1851.2026, Ypred[0, :], Yvar[0, :], Ytrue[0, :]]).T)

        for n, g in graphs.iteritems():
            if g:
                ion()
                for k in g.keys():
                    if k in graph_n.keys():
                        print k, 'n: ', graph_n[k]
                if n in ['SSE', 'NLPD']:
                    g= DataFrame(dict([(k,Series(v)) for k,v in g.iteritems()]))
                    ax = g.plot(kind='box', title=n)
                    check_dir_exists('../graph_data/')
                    g.to_csv('../graph_data/' + name  + '_' + n + '_data.csv', index=False)
                if n in ['ER', 'MSSE']:
                    g= DataFrame(dict([(k,Series(v)) for k,v in g.iteritems()]))
                    check_dir_exists('../graph_data/')
                    g.to_csv('../graph_data/' + name  + '_' + n + '_data.csv', index=False)
                    m = g.mean()
                    errors = g.std()
                    ax =m.plot(kind='bar', yerr=errors, title=n)
                    patches, labels = ax.get_legend_handles_labels()
                    ax.legend(patches, labels, loc='lower center')
                if n in ['intensity']:
                    X = g.values()[0][:, 0]
                    true_data = DataFrame({'x': X, 'y': g.values()[0][:, 3]})
                    true_data.to_csv('../graph_data/' + name  + '_' + 'true_y' + '_data.csv', index=False)
                    plt.figure()
                    color = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'w']
                    c = 0
                    check_dir_exists('../graph_data/')
                    graph_data = DataFrame()
                    for k,v in g.iteritems():
                        # plt.plot(X, v[:, 1], hold=True, color=color[c], label=k)
                        # plt.fill_between(X, v[:, 1] - 2 * np.sqrt(v[:, 2]), v[:, 1] + 2 * np.sqrt(v[:, 2]), alpha=0.2, facecolor=color[c])
                        graph_data = graph_data.append(DataFrame({'x': X, 'm' : v[:, 1], 'v' :v[:, 2], 'model_sp' :[k] * X.shape[0]}
                                                                 ))
                        c += 1
                    plt.legend(loc='upper center')
                    graph_data.to_csv('../graph_data/' + name  + '_' + n + '_data.csv', index=False)

                show(block=True)

コード例 #13

0

ファイルを表示

ファイル: testTitanic.py プロジェクト: zhangweijiqn/testPython

random.seed(42)
classifier = skflow.TensorFlowEstimator(model_fn=dnn_tanh,
                                        n_classes=2, batch_size=128, steps=2000, learning_rate=0.02)
classifier.fit(X=x_data, y=y_data)
score = metrics.accuracy_score(y_data, classifier.predict(x_data))
print("Accuracy: %f" % score)   #0.823793


test_data = pd.read_csv('../resources/test_titanic.csv',header=0)
x_data2 = test_data[['Pclass','Sex','Age','SibSp','Parch','Fare','Embarked']]
x_data2.Sex = x_data2.Sex.map( {'female': 1, 'male': 0} )
x_data2.Age.fillna(value=random.randint(1, 100),inplace=True)
x_data2.Fare.fillna(value=x_data2.Fare.mean(),inplace=True)
x_data2.Embarked.fillna('C',inplace=True)
x_data2.Embarked = x_data2.Embarked.map( {'C': 1, 'S': 2, 'Q':3, '':4 } )
x_data2['Pclass_Log'] = x_data2['Pclass'].map(lambda x: math.log(x))
x_data2['Sex_Log'] = x_data2['Sex'].map(lambda x: math.log(x+1))
x_data2['Age_Log'] = x_data2['Age'].map(lambda x: math.log(x))
x_data2['SibSp_Log'] = x_data2['SibSp'].map(lambda x: math.log(x+1))
x_data2['Parch_Log'] = x_data2['Parch'].map(lambda x: math.log(x+1))
x_data2['Fare_Log'] = x_data2['Fare'].map(lambda x: math.log(x+1))
x_data2['Embarked_Log'] = x_data2['Embarked'].map(lambda x: math.log(x))


test_predict = classifier.predict(x_data2)
test_id = test_data['PassengerId']
data = {'PassengerId':test_data['PassengerId'],'Survived':test_predict}
result = DataFrame(data)

result.to_csv('result.csv',index=False)#0.75598__author__ = 'zhangwj'

コード例 #14

0

ファイルを表示

def read_history(code='600036', begin_date=None, end_date=None):
    if begin_date is None:
        begin = arrow.get('2014-01-01')
    else:
        begin = arrow.get(begin_date)
        # print begin_date
    if end_date is None:
        end = arrow.now()
    else:
        end = arrow.get(end_date)

    code2 = code
    if len(code) == 8:
        pass
    elif code.startswith('60') or code.startswith('51'):
        code2 = 'SH' + code
    elif len(code) == 5:
        code2 = 'HK' + code
    elif len(code) == 6:
        code2 = 'SZ' + code

    # url = '{}/stock/forchartk/stocklist.json?symbol={}&period=1day&type=normal&begin={}&end={}&_=1443694358741'
    url = '{}/stock/forchartk/stocklist.json?symbol={}&period=1day&type=before&begin={}&end={}'
    url = url.format(api_home, code2, begin.timestamp * 1000,
                     end.timestamp * 1000)
    # print(url)
    payload = {'access_token': xq_a_token}

    r = requests.get(url, params=payload, headers=headers)
    print(r.json())
    data_list = r.json().get('chartlist')
    # print data_list
    # print len(data_list)
    result = []
    for data in data_list:
        print(data)
        time = data.get('time')
        time = arrow.get(time, 'ddd MMM DD HH:mm:ss Z YYYY')
        date = time.format('YYYY-MM-DD')
        # print('date:{}'.format(date))
        # timestamp = time.timestamp*1000
        # history = StockHistory(code=code, percent=data.get('percent'),
        #                        ma5=data.get('ma5'), ma10=data.get('ma10'), ma30=data.get('ma30'),
        #                        open_price=data.get('open'), high=data.get('high'), low=data.get('low'),
        #                        close=data.get('close'), time=time.datetime, timestamp=timestamp,
        #                        volume=data.get('volume'),
        #                        # 注：指数无法取得换手率
        #                        turn_rate=data.get('turnrate'))
        # print(Equity.objects(code=code, date=date))
        Equity.objects(code=code,
                       date=date).update_one(percent=data.get('percent'),
                                             open=data.get('open'),
                                             high=data.get('high'),
                                             low=data.get('low'),
                                             close=data.get('close'),
                                             volume=data.get('volume'),
                                             upsert=True)
        nh = False
        nl = False
        # if high == high52week:
        #     nh = True
        # if low == low52week:
        #     nl = True
        # Equity.objects(code=code, date=date).update_one(percent=data.get('percent'),
        #                        ma5=data.get('ma5'), ma10=data.get('ma10'), ma30=data.get('ma30'),
        #                        open_price=data.get('open'), high=data.get('high'), low=data.get('low'),
        #                        close=data.get('close'), time=time.datetime, timestamp=timestamp,
        #                        volume=data.get('volume'),
        #                        # 注：指数无法取得换手率
        #                        turn_rate=data.get('turnrate'), upsert=True)
        # print history
        # result.append(history)
    df = DataFrame(data_list)
    # print df
    max_turnover = df['turnrate'].max()
    min_turnover = df['turnrate'].min()
    # print df['turnrate'].mean()
    # max_turnover_index = df.loc[df['turnrate'] == max_turnover].index
    # print max_turnover_index
    columns = ['time', 'turnrate', 'volume', 'close']
    # print df.loc[df['turnrate'] == max_turnover][columns]
    # print df.loc[df['turnrate'] == min_turnover][columns]
    max_volume = df['volume'].max()
    min_volume = df['volume'].min()
    mean_volume = df['volume'].mean()
    # print df.loc[df['volume'] == max_volume][columns]
    # print df.loc[df['volume'] == min_volume][columns]
    return result

コード例 #15

0

ファイルを表示

ファイル: column_difficulty.py プロジェクト: anlausch/minir-plots

seriesDict = {key: dict() for key in queriesDiff}

for run in datas:
    data = datas[run]

    mean = np.average([data[key] for key in queries])
    stderr = np.std([data[key] for key in queries]) / sqrt(len(queries))
    for (label, queriesByD) in queriesDiff.items():
        seriesDict[label][run] = np.average(
            [data[key] for (key, x) in queriesByD])

print("dropping queries because of NaN values: " +
      " ".join(queriesWithNanValues))

df1 = DataFrame(seriesDict,
                columns=("0%-5%", "5%-25%", '25%-50%', '50%-75%', '75%-95%',
                         '95%-100%'),
                index=args.runs)
df2 = df1
df2.index = [os.path.basename(label) for label in df1.index]

df3 = df2.transpose()

plt.figure()
df3.plot(kind='bar',
         label=args.metric,
         color=['0.0', '0.80', '0.4', '0.9', '0.70'])
leg = plt.legend(loc='best', fancybox=True)
leg.get_frame().set_alpha(0.5)
plt.tick_params(axis='both', which='major', labelsize=11)
plt.xticks(rotation=0)
plt.ylabel(args.metric, fontsize=20)

コード例 #16

0

ファイルを表示

ファイル: testTitanic.py プロジェクト: zhangweijiqn/testPython

y_data = ori_data['Survived']

# specify parameters via map
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
num_round = 2
dtrain = xgb.DMatrix(np.array(x_data),np.array(y_data))
bst = xgb.train(param, dtrain, num_round)

test_data = pd.read_csv('../resources/test_titanic.csv',header=0)
x_data2 = test_data[['Pclass','Sex','Age','SibSp','Parch','Fare','Embarked']]
x_data2.Sex = x_data2.Sex.map( {'female': 1, 'male': 0} )
x_data2.Age.fillna(value=random.randint(1, 100),inplace=True)
x_data2.Fare.fillna(value=x_data2.Fare.mean(),inplace=True)
x_data2.Embarked.fillna('C',inplace=True)
x_data2.Embarked = x_data2.Embarked.map( {'C': 1, 'S': 2, 'Q':3, '':4 } )
x_data2['Pclass_Log'] = x_data2['Pclass'].map(lambda x: math.log(x))
x_data2['Sex_Log'] = x_data2['Sex'].map(lambda x: math.log(x+1))
x_data2['Age_Log'] = x_data2['Age'].map(lambda x: math.log(x))
x_data2['SibSp_Log'] = x_data2['SibSp'].map(lambda x: math.log(x+1))
x_data2['Parch_Log'] = x_data2['Parch'].map(lambda x: math.log(x+1))
x_data2['Fare_Log'] = x_data2['Fare'].map(lambda x: math.log(x+1))
x_data2['Embarked_Log'] = x_data2['Embarked'].map(lambda x: math.log(x))

dtest = xgb.DMatrix(np.array(x_data2))
preds = bst.predict(dtest)
# print(preds)
data = {'PassengerId':test_data['PassengerId'],'Survived':preds}
result = DataFrame(data)
#
result.to_csv('result.csv',index=False)#0.75598

コード例 #17

0

ファイルを表示

ファイル: column.py プロジェクト: laura-dietz/minir-plots

def main():
        mpl.use("Agg")
        def read_ssv(fname):
            lines = [line.split() for line in open(fname, 'r')]
            if args.format.lower() == 'galago_eval':
                return lines
            elif args.format.lower() == 'trec_eval':
                return [[line[1], line[0]] + line[2:] for line in lines]


        def readNumQueries(run):
            tsv = read_ssv(run)
            data = [int(row[2]) for row in tsv if row[0] == "all" and row[1] == numQueries_key]
            return data[0]


        def findQueriesWithNanValues(run):
            tsv = read_ssv(run)
            # print ("tsv,", tsv)
            queriesWithNan = {row[0] for row in tsv if row[1] == 'num_rel' and (float(row[2]) == 0.0 or math.isnan(float(row[2])))}
            return queriesWithNan

        def fetchValues(run):
            tsv = read_ssv(run)
            data = {row[0]: float(row[2]) for row in tsv if row[1] == args.metric and not math.isnan(float(row[2]))}
            return data

        args = parser.parse_args()

        pairedt = pairedttest.pairedt(best=True, format=args.format, metric=args.metric, runs=args.runs)
        print("paired t")
        print(pairedt)
        print("=-----=")

        numQueries_key = "num_q"
        print("column.py metric="+args.metric+" out="+args.out)
        
        datas = {run: fetchValues(run) for run in args.runs}
        
        # deal with nans
        queriesWithNanValues = {'all'}.union(*[findQueriesWithNanValues(run) for run in args.runs])
        basedata=datas[args.runs[0]]
        queries = set(basedata.keys()).difference(queriesWithNanValues)
        numQueries = readNumQueries(args.runs[0]) if args.c else len(queries)
        
        seriesDict = {'mean':dict(), 'stderr':dict()}
        
        
        for run in datas:
            data = datas[run]
            
            if sum(not key in data for key in queries) > 0:
                print("data for run "+run+" does not contain all queries "+" ".join(queries))

            mean = np.sum([data.get(key, 0.0) for key in queries]) / numQueries
            stderr = np.std([data.get(key, 0.0) for key in queries] + ([0.0]* (numQueries - len(queries)))) / sqrt(numQueries)
            seriesDict['mean'][run]=mean
            seriesDict['stderr'][run]=stderr




        print( "dropping queries because of NaN values: "+ " ".join(queriesWithNanValues))

        print ('\t'.join(['run', 'mean/stderr']))
        for run in datas:
            #if not run == args.runs[0]:
            print ('\t'.join([run, str(seriesDict['mean'][run]), str(seriesDict['stderr'][run])]))


        df1 = DataFrame(seriesDict, index=pd.Index(args.runs))
        if args.sort:
                df1.sort_values('mean',ascending=False,inplace=True) 
        df2 = df1['mean']
        df2.index=[os.path.basename(label) for label in df1.index]
        df1.index=[os.path.basename(label) for label in df1.index]

        print('df2.index=',df2.index)

        df2.text=['**' if (math.isnan(pairedt[label][1]) or pairedt[label][1]>0.05) else '' for label in df2.index]
        min_same_idx = max( [i if (math.isnan(pairedt[label][1]) or pairedt[label][1]>0.05) else 0 for i,label in enumerate(df2.index)])


        cs = {k:v for k,v in zip(sorted(list(set([label[0:3] for label in df1.index]))), itertools.cycle(['0.1','0.9','0.5','0.3','0.7','0.2','0.8', '0.4','0.6'])) }
        df1['color']=[cs[label[0:3]] for label in df1.index]
        print(df1['color'])
        plt.tick_params(colors=df1.color)
        fig, ax = plt.subplots()


        df2.plot.bar(yerr = df1['stderr'], color=df1.color.values,  ax=ax)

        for (p, i) in zip(ax.patches,range(100)):
            if args.sort :

                if i==min_same_idx:
                    frompoint=(p.get_x()+p.get_width(), p.get_height()/2.0)
                    topoint=(0.0-p.get_width()/2.0, p.get_height()/2.0)
                    ax.annotate("",
                                xy=topoint, xycoords='data',
                                xytext=frompoint, textcoords='data',
                                arrowprops=dict(arrowstyle="<|-|>",
                                                connectionstyle="arc3", ec='r'),
                                )
            else:
                ax.annotate(df2.text[i], xy=(p.get_x() + p.get_width() / 2.0, p.get_height()*0.9), ha='center', va='center',)

        ax.grid()
        plt.ylabel(args.metric, fontsize=20)
        plt.tick_params(axis='both', which='major', labelsize=20)
        plt.xticks(rotation=90)
        plt.savefig(args.out, bbox_inches='tight')

コード例 #18

0

ファイルを表示

mapping_doors = {'2': 0, '3': 1, '4': 2, '5more': 3}
mapping_persons = {'2': 0, '4': 1, 'more': 2}
mapping_lug = {'small': 0, 'med': 1, 'big': 2}
mapping_safety = {'low': 0, 'med': 1, 'high': 2}
mapping_class = {'unacc': 1, 'acc': 2, 'good': 3, 'vgood': 4}

df['maint'] = df['maint'].map(mapping_buy_maint)
df['buying'] = df['buying'].map(mapping_buy_maint)
df['doors'] = df['doors'].map(mapping_doors)
df['persons'] = df['persons'].map(mapping_persons)
df['lug_boot'] = df['lug_boot'].map(mapping_lug)
df['safety'] = df['safety'].map(mapping_safety)
df['class'] = df['class'].map(mapping_class).astype(int)

df = df.reset_index(drop=True)
labels_df = DataFrame()
labels_df['cat'] = df['class'].copy()
features_df = df.copy()
features_df = features_df.drop('class', axis=1)

c45 = C45Constructor(cf=0.95)
cart = CARTConstructor(max_depth=12, min_samples_leaf=2)
quest = QuestConstructor(default=1,
                         max_nr_nodes=1,
                         discrete_thresh=10,
                         alpha=0.99)

tree_constructors = [c45, cart, quest]

tree_confusion_matrices = {}
for tree_constructor in tree_constructors:

コード例 #19

0

ファイルを表示

    seriesDict['mean'][run] = mean
    seriesDict['stderr'][run] = stderr

print("dropping queries because of NaN values: " +
      " ".join(queriesWithNanValues))

print('\t'.join(['run', 'mean/stderr']))
for run in datas:
    if not run == args.runs[0]:
        print('\t'.join([
            run,
            str(seriesDict['mean'][run]),
            str(seriesDict['stderr'][run])
        ]))

df1 = DataFrame(seriesDict, index=args.runs)
df2 = df1['mean']
df2.index = [os.path.basename(label) for label in df1.index]

plt.figure()
df2.plot(kind='bar',
         yerr=df1['stderr'],
         color=[
             '0.0', '0.6', '0.4', '0.8', '0.4', '0.8', '0.4', '0.8', '0.4',
             '0.8'
         ])
plt.ylabel(args.metric, fontsize=20)
plt.tick_params(axis='both', which='major', labelsize=20)
plt.xticks(rotation=90)
plt.savefig(args.out, bbox_inches='tight')

コード例 #20

0

ファイルを表示

ファイル: plot_results.py プロジェクト: zphilip/Sparse-GP

    def plot_output(name, infile_path, model_names, filter):
        """
        Reads predictions from csv files and generates plots and output csv. Input csv files should be in the
        infile_path with following structure:

        ``infile_path`` /
                    ../any_name/
                                ../config.csv, test_.csv,train_.csv
                    ../any_name2
                                ../config.csv, test_.csv,train_.csv

        The function also exports the data used to generate graphs as csv files the following folder:
            ../graph_data
        these csv files can be used to reproduce outputs.

        Parameters
        ----------
        name : string
         name of the csv files to which data will be exported

        infile_path : string
         the folder which contains csv for configs and test and train

        model_names : list
         name of the sub-directories in ``infile_path`` to consider

        filter : callable
         a filter which will be applied in config files to filter which configs should be considered. For example,
         lambda x: x['method'] == 'full' will only consider outputs which used 'full' method
        """
        graphs = {
            'SSE': {},
            'MSSE': {},
            'NLPD': {},
            'ER': {},
            'intensity': {},
        }
        graph_n = {}
        for m in model_names:
            data_config = PlotOutput.read_config(
                infile_path + m + '/' + model_logging.CONFIG_FILE_NAME)
            if filter is None or filter(data_config):
                data_test = pandas.read_csv(
                    infile_path + m + '/' +
                    model_logging.PREDICTIONS_FILE_NAME)
                cols = data_test.columns
                dim = 0
                for element in cols:
                    if element.startswith('true_Y'):
                        dim += 1

                data_train = pandas.read_csv(infile_path + m + '/' +
                                             model_logging.TRAINING_FILE_NAME)
                Y_mean = data_train['Y_0'].mean()

                Ypred = np.array(
                    [data_test['predicted_Y_%d' % (d)] for d in range(dim)])
                Ytrue = np.array(
                    [data_test['true_Y_%d' % (d)] for d in range(dim)])
                Yvar = np.array([
                    data_test['predicted_variance_%d' % (d)]
                    for d in range(dim)
                ])

                if not (PlotOutput.config_to_str(data_config)
                        in graph_n.keys()):
                    graph_n[PlotOutput.config_to_str(data_config)] = 0
                graph_n[PlotOutput.config_to_str(data_config)] += 1

                if data_config['ll'] in [CogLL.__name__]:
                    for i in range(Ytrue.shape[0]):
                        Y_mean = data_train['Y_' + str(i)].mean()
                        PlotOutput.add_to_list(
                            graphs['MSSE'],
                            PlotOutput.config_to_str(data_config) + '_' +
                            str(i), ((Ypred[i] - Ytrue[i])**2).mean() /
                            ((Y_mean - Ytrue[i])**2).mean())
                        NLPD = np.array(data_test['NLPD_' + str(i)])
                        PlotOutput.add_to_list(
                            graphs['NLPD'],
                            PlotOutput.config_to_str(data_config) + '_' +
                            str(i), NLPD)

                if data_config['ll'] in [
                        UnivariateGaussian.__name__, WarpLL.__name__
                ]:
                    NLPD = np.array(data_test['NLPD_0'])
                    PlotOutput.add_to_list(
                        graphs['SSE'], PlotOutput.config_to_str(data_config),
                        (Ypred[0] - Ytrue[0])**2 /
                        ((Y_mean - Ytrue[0])**2).mean())
                    PlotOutput.add_to_list(
                        graphs['NLPD'], PlotOutput.config_to_str(data_config),
                        NLPD)

                if data_config['ll'] in [LogisticLL.__name__]:
                    NLPD = np.array(data_test['NLPD_0'])
                    PlotOutput.add_to_list(
                        graphs['ER'], PlotOutput.config_to_str(data_config),
                        np.array([
                            (((Ypred[0] > 0.5) & (Ytrue[0] == -1))
                             | ((Ypred[0] < 0.5) & (Ytrue[0] == 1))).mean()
                        ]))
                    PlotOutput.add_to_list(
                        graphs['NLPD'], PlotOutput.config_to_str(data_config),
                        NLPD)

                if data_config['ll'] in [SoftmaxLL.__name__]:
                    NLPD = np.array(data_test['NLPD_0'])
                    PlotOutput.add_to_list(
                        graphs['ER'], PlotOutput.config_to_str(data_config),
                        np.array([(np.argmax(Ytrue, axis=0) != np.argmax(
                            Ypred, axis=0)).mean()]))
                    PlotOutput.add_to_list(
                        graphs['NLPD'], PlotOutput.config_to_str(data_config),
                        NLPD)

                if data_config['ll'] in [LogGaussianCox.__name__]:
                    X0 = np.array([data_test['X_0']])

                    PlotOutput.add_to_list(
                        graphs['intensity'],
                        PlotOutput.config_to_str(data_config),
                        np.array([
                            X0[0, :] / 365 + 1851.2026, Ypred[0, :],
                            Yvar[0, :], Ytrue[0, :]
                        ]).T)

        for n, g in graphs.iteritems():
            if g:
                ion()
                for k in g.keys():
                    if k in graph_n.keys():
                        print k, 'n: ', graph_n[k]
                if n in ['SSE', 'NLPD']:
                    g = DataFrame(
                        dict([(k, Series(v)) for k, v in g.iteritems()]))
                    ax = g.plot(kind='box', title=n)
                    check_dir_exists('../graph_data/')
                    g.to_csv('../graph_data/' + name + '_' + n + '_data.csv',
                             index=False)
                if n in ['ER', 'MSSE']:
                    g = DataFrame(
                        dict([(k, Series(v)) for k, v in g.iteritems()]))
                    check_dir_exists('../graph_data/')
                    g.to_csv('../graph_data/' + name + '_' + n + '_data.csv',
                             index=False)
                    m = g.mean()
                    errors = g.std()
                    ax = m.plot(kind='bar', yerr=errors, title=n)
                    patches, labels = ax.get_legend_handles_labels()
                    ax.legend(patches, labels, loc='lower center')
                if n in ['intensity']:
                    X = g.values()[0][:, 0]
                    true_data = DataFrame({'x': X, 'y': g.values()[0][:, 3]})
                    true_data.to_csv('../graph_data/' + name + '_' + 'true_y' +
                                     '_data.csv',
                                     index=False)
                    plt.figure()
                    color = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'w']
                    c = 0
                    check_dir_exists('../graph_data/')
                    graph_data = DataFrame()
                    for k, v in g.iteritems():
                        # plt.plot(X, v[:, 1], hold=True, color=color[c], label=k)
                        # plt.fill_between(X, v[:, 1] - 2 * np.sqrt(v[:, 2]), v[:, 1] + 2 * np.sqrt(v[:, 2]), alpha=0.2, facecolor=color[c])
                        graph_data = graph_data.append(
                            DataFrame({
                                'x': X,
                                'm': v[:, 1],
                                'v': v[:, 2],
                                'model_sp': [k] * X.shape[0]
                            }))
                        c += 1
                    plt.legend(loc='upper center')
                    graph_data.to_csv('../graph_data/' + name + '_' + n +
                                      '_data.csv',
                                      index=False)

                show(block=True)

コード例 #21

0

ファイルを表示

ファイル: blair-house-plot.py プロジェクト: laura-dietz/blair-house-project

blairInsideAll = genfromtxt('blair-inside.tsv', dtype=None, names='time,sid,mid,value')
blairOutsideAll = genfromtxt('blair-outside.tsv', dtype=None, names='time,sid,mid,value')
filt = lambda b: b[np.logical_and(b['time'] != 0, b['sid'] == 2)]
blairOutside = filt(blairOutsideAll)
blairInside = filt(blairInsideAll)

begin = blairInside['time'][0]
end = blairInside['time'][-1]
duration = end - begin
steps = 300
slices = [(begin + duration * step / steps, begin + duration * (step + 2) / steps) for step in range(0, steps-2)]

slicedDates = [begin + (begin - end) / 2 for ( begin, end) in slices] # re-center date in middle of avg window

idx = pandas.to_datetime(slicedDates, unit='s', utc=True)
df1 = DataFrame({'inside': mean_std(blairInside, slices)[0], 'outside': mean_std(blairOutside, slices)[0]}
                , index=idx, columns=['inside', 'outside'])
df1.plot(kind='line')
plt.gca().xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%H:%M', tz=timezone("America/New_York")))


def fill_error_tube(b, color):
    (mean, error) = mean_std(b, slices)
    plt.fill_between(df1.index, mean - error, mean + error, color=color)


fill_error_tube(blairInside, [0.5, 0.5, 0.5, 0.5])
fill_error_tube(blairOutside, [0.5, 0.5, 0.5, 0.5])

plt.ylabel("Temperatur [Celsius]", fontsize=20)
plt.xlabel("Time [Hours]", fontsize=20)