Example #1
0
def get_reads_per_group(df, prefix, taxlevel='species', min_reads=10, names=[]
                        ):
    """
    Get the number of reads per taxonomic level and the number of unique taxa
    per read

    :param min_reads: Minumum number of reads to retain group
    :param df: filtered blast dataframe
    :param taxlevel: taxonomic
    :return:
    """
    # Get number of reads per taxonomic group
    # Get empty taxlevels
    df = df.apply(report_any, args=(taxlevel, names,), axis=1)
    cou = df.groupby([taxlevel])['qseqid'].nunique()
    if 'size' in df.columns:
        size = df.groupby([taxlevel])['size'].sum()
        size.name = 'Total'
        cou.name = 'Unique'
        cou = pd.concat((cou, size), axis=1)
    cou.to_csv('%s_number_of_reads_in_%s.tsv' % (prefix, taxlevel), sep='\t')
    # Get number of unique species per read
    re = pd.concat([df.groupby('qseqid')[taxlevel].nunique().rename(
        'No. unique taxa'), df.groupby('qseqid')[taxlevel].unique().rename(
        'Unique taxa')], axis=1).sort_values(by='No. unique taxa',
                                             ascending=False)
    re.to_csv('%s_Number_unique_%s_per_read.tsv' % (prefix, taxlevel),
              sep='\t')
    # List number of unique species above the min_reads
    sps = cou[cou > min_reads].index.unique().to_series()
    sps.to_csv('%s_List_unique_%s.txt' % (prefix, taxlevel), header=False,
               index=False)
    return df
 def save_pattern_abnormal(self, path1, path2, path3):
     """
     implement : save pattern abnormal label to a file
     :return:
     """
     df1 = pd.read_csv(path1, header=None)
     # print(df1)
     df2 = pd.read_csv(path2, header=None)
     re = pd.concat([df1, df2], axis=1)
     re.to_csv(path3, sep=' ', index=False, header=False)
            dic_tmp['count'] = count
            dic_tmp['price'] = meam_tmp
            dic_tmp['month_six_rs'] = month_six_rs
            dic_tmp['year_rs'] = year_rs
            dic_tmp['year'] = year
            dic_tmp['grow_6'] = grow_6
            dic_tmp['grow_3'] = grow_3
            dic_tmp['grow_3_1'] = grow_3_1
            dic_tmp['grow_3_2'] = grow_3_2
            dic_tmp['grow_3_3'] = grow_3_3
            dic_tmp['volume_start'] = vol_start
            dic_tmp['volume_end'] = vol_end
            dic_tmp['vol_radio'] = vol_radio
            li.append(dic_tmp)
        nu_nu = nu_nu + 1
    datatable = pd.DataFrame(li)
    return datatable


pan = get_laohu_price(url, li_code)
re = pd.merge(code, pan, how='outer', on='code')
re = re.drop_duplicates()
re.to_csv(date + '_Laohu_us_grow_rs.csv', encoding='gbk', index=False)

# cn_us=get_laohu_code(url_us, [i for i in range(14)])
# cn_us.to_csv(date+'us_us_code.csv', index=False, encoding ='gbk')

# frames=[cn_nsdq,cn_ny,cn_us]
# sum=pd.concat(frames,ignore_index=True)
# sum.to_csv(date+'us_all_code.csv', index=False, encoding ='gbk')
Example #4
0
            dic_tmp['price_week_3'] = price_week_3
            dic_tmp['week_6_rs'] = week_6_rs
            dic_tmp['week_3_rs'] = week_3_rs
            dic_tmp['year'] = year
            dic_tmp['grow_self_6'] = grow_self_6
            dic_tmp['grow_self_3'] = grow_self_3
            dic_tmp['grow_radio_6'] = grow_radio_6
            dic_tmp['grow_radio_3'] = grow_radio_3
            dic_tmp['volume_6'] = vol_6
            dic_tmp['volume_3'] = vol_3
            dic_tmp['vol_radio'] = vol_radio
            li.append(dic_tmp)
        nu_nu = nu_nu + 1
    datatable = pd.DataFrame(li)
    return datatable


pan = get_laohu_price(url, li_code)
re = pd.merge(code, pan, how='outer', on='code')
re = re.drop_duplicates()
re.to_csv(date + '_Laohu_us_week_grow_radio_rs_2.csv',
          encoding='gbk',
          index=False)

# cn_us=get_laohu_code(url_us, [i for i in range(14)])
# cn_us.to_csv(date+'us_us_code.csv', index=False, encoding ='gbk')

# frames=[cn_nsdq,cn_ny,cn_us]
# sum=pd.concat(frames,ignore_index=True)
# sum.to_csv(date+'us_all_code.csv', index=False, encoding ='gbk')
Example #5
0
            dic_tmp['count'] = count
            dic_tmp['mean'] = meam_tmp
            dic_tmp['std'] = std_tmp
            dic_tmp['max'] = max
            dic_tmp['min'] = min
            dic_tmp['start'] = startprice
            dic_tmp['end'] = endd
            dic_tmp['year'] = year
            dic_tmp['price_start'] = price_start
            dic_tmp['price_middle'] = price_middle
            dic_tmp['price_end'] = price_end
            dic_tmp['volume_start'] = vol_start
            dic_tmp['volume_end'] = vol_end
            li.append(dic_tmp)
        nu_nu = nu_nu + 1
    datatable = pd.DataFrame(li)
    return datatable


pan = get_laohu_price(url, li_code)
re = pd.merge(code, pan, how='outer', on='code')
re = re.drop_duplicates()
re.to_csv(date + '_Laohu_us_price.csv', encoding='gbk', index=False)

# cn_us=get_laohu_code(url_us, [i for i in range(14)])
# cn_us.to_csv(date+'us_us_code.csv', index=False, encoding ='gbk')

# frames=[cn_nsdq,cn_ny,cn_us]
# sum=pd.concat(frames,ignore_index=True)
# sum.to_csv(date+'us_all_code.csv', index=False, encoding ='gbk')
Example #6
0
    coun=len(code_year)
    s = r'股東權益回報率(.*?)資本運用回報率'
    pat = re.compile(s)
    codd = pat.findall(html)
    if  codd:
        if not codd is None:
            s = r'">(.*?)</td>'
            pat = re.compile(s)
            code_value = pat.findall(codd[0])
            print(code_value[:coun])

    for i in li:
        i.append('0')

    for i in range(coun):
        for j in range(len(hee)):
            if code_year[i][:4]==hee[j]:
                li[j][n]=code_value[i]

    n=n+1

pdd=pd.DataFrame(li,   columns=li_code, index=[ 'roe2013','roe2014','roe2015','roe2016','roe2017','roe2018'])
pan=pdd.T
pan['code'] = li_code
re=pd.merge(code,pan,how='outer',on='code')
re.to_csv(date+'_aastocks_uk_roe.csv', encoding = 'gbk',index=False)




Example #7
0
    t = zong.loc[:,~zong.columns.duplicated()]
    # t=zong.drop(0,axis=0)
    date=time.strftime('%Y-%m-%d',time.localtime(time.time()))
    t.index.rename('code', inplace=True)
    t.reset_index(inplace=True)
    # t.code=t.code.str.replace('sz', '').replace('sh', '')
    return t

# 获取股票代码列表
code= pd.read_excel('Data20190311.xls',encoding='gbk')
# code = list(set(code))
listcode=code.code.tolist()
Code_List=[]
for item in listcode:
    if len(str(item)) == 6 and str(item)[0] == '6':
        Code_List.append('sh'+str(item))
    if len(str(item)) < 6:
        Code_List.append('sz'+(6-len(str(item)))*'0'+str(item))
    if len(str(item)) == 6 and str(item)[0] != '6':
        Code_List.append('sz'+str(item))
code.code=pd.Series(Code_List)
# code = code[:10]
t=get_income(url,Code_List)
re=pd.merge(code, t, how='outer',on='code')
re.to_csv(date+'_year_code_eastmoney.csv',encoding = 'gbk',index=False)
# print(t)




            price_middle=jo[int(count/3):int((count*2)/3)]['close'].mean()
            price_end=jo[int((count*2)/3):]['close'].mean()
            vol_start=jo[:int(count/2)]['volume'].mean()
            vol_end=jo[int(count/2):]['volume'].mean()
            vol_radio=(vol_end-vol_start)/vol_start
            dic_tmp['count']=count
            dic_tmp['price']=meam_tmp
            dic_tmp['month_six_rs']=month_six_rs
            dic_tmp['year_rs']=year_rs
            dic_tmp['year']=year
            dic_tmp['volume_start']=vol_start
            dic_tmp['volume_end']=vol_end
            dic_tmp['vol_radio']=vol_radio
            li.append(dic_tmp)
        nu_nu=nu_nu+1
    datatable=pd.DataFrame(li)
    return datatable

pan=get_laohu_price(url, li_code)
re=pd.merge(code, pan, how='outer',on='code')
re=re.drop_duplicates()
re.to_csv(date+'_Laohu_us_strength.csv', encoding = 'gbk',index=False)


# cn_us=get_laohu_code(url_us, [i for i in range(14)])
# cn_us.to_csv(date+'us_us_code.csv', index=False, encoding ='gbk')

# frames=[cn_nsdq,cn_ny,cn_us]
# sum=pd.concat(frames,ignore_index=True)
# sum.to_csv(date+'us_all_code.csv', index=False, encoding ='gbk')