Esempio n. 1
0
def run():

    args = BPRMF_Args(parse_args())

    # 获取训练集的dataloader形式
    data = DATA(args.data_path, args.dataset_name)
    train_set, train_U2I, test_U2I, n_users, n_items = data.load()
    train_dl = get_loader(train_set, train_U2I, n_items, args.batch_size,
                          args.cores)

    # 定义网络
    model = BPRMF(n_users, n_items, args)
    model = model.cuda()
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    # 定义会话
    sess = Session(model)

    for epoch in range(args.num_epochs):
        loss, mf_loss, emb_loss = sess.train(train_dl, optimizer)
        print("epoch: {:d}, loss = [{:.6f} == {:.6f} + {:.6f}]".format(
            epoch, loss, mf_loss, emb_loss))
        perf_info = evaluate(model, n_users, n_items, train_U2I, test_U2I,
                             args)
        print("precision: [{:.6f}] recall: [{:.6f}] ndcg: [{:.6f}]".format(
            perf_info[0], perf_info[1], perf_info[2]))
Esempio n. 2
0
def run():

    args = NGCF_Args(parse_args())

    # 获取训练集的dataloader形式
    data = DATA(args.data_path, args.dataset_name)
    train_set, train_U2I, test_U2I, edge_indices, edge_weight, n_users, n_items = data.load(
    )
    train_dl = get_loader(train_set, train_U2I, n_items, args.batch_size,
                          args.cores)

    # 获取归一化的拉普拉斯矩阵
    laplace_graph = Graph(edge_indices, edge_weight)
    laplace_graph.add_self_loop()
    laplace_graph.norm()
    norm_adj = laplace_graph.mat.cuda()

    # 定义网络
    model = NGCF(n_users, n_items, norm_adj, args)
    model = model.cuda()
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    # 定义会话
    sess = Session(model)

    for epoch in range(args.num_epochs):
        loss, mf_loss, emb_loss = sess.train(train_dl, optimizer)
        print("epoch: {:d}, loss = [{:.6f} == {:.6f} + {:.6f}]".format(
            epoch, loss, mf_loss, emb_loss))
        perf_info = evaluate(model, n_users, n_items, train_U2I, test_U2I,
                             args)
        print("precision: [{:.6f}] recall: [{:.6f}] ndcg: [{:.6f}]".format(
            perf_info[0], perf_info[1], perf_info[2]))
Esempio n. 3
0
    def correlation(self,indicators=None):
        if not indicators:
            indicators=self.indicators

        comb=DATA.by_indicators(indicators)
        def _spearman(df):
            df=df.dropna()
            if df.shape[0]>10:#TODO:thresh to choose
                return cal_corr(df,'spearman',winsorize=False)

        def _pearson(df):
            df=df.dropna()
            if df.shape[0]>10:#TODO: min_samples
                return cal_corr(df,'pearson',winsorize=True)


        corrs=comb.groupby('t').apply(_spearman)
        corrp=comb.groupby('t').apply(_pearson)

        corrsAvg=corrs.groupby(level=1).mean().reindex(index=indicators, columns=indicators)
        corrpAvg=corrp.groupby(level=1).mean().reindex(index=indicators, columns=indicators)

        corr1 = np.tril(corrpAvg.values, k=-1)
        corr2 = np.triu(corrsAvg.values, k=1)

        corr = pd.DataFrame(corr1 + corr2, index=corrpAvg.index, columns=corrpAvg.columns)
        np.fill_diagonal(corr.values, np.NaN)
        corr.to_csv(os.path.join(self.path, 'corr.csv'))
Esempio n. 4
0
    def fm(self,wsz=None):
        comb=DATA.by_indicators(self.indicators+['eretM'])
        data=[]
        ps=[]
        for indicator in self.indicators:
            subdf=comb[[indicator,'eretM']]
            subdf=subdf.dropna()
            subdf.columns=['y','x']
            # The independent variable is winsorized at a given level on a monthly basis. as page 141
            subdf['x']=subdf.groupby('t')['x'].apply(lambda s:winsorize(s,limits=WINSORIZE_LIMITS))
            subdf=subdf.reset_index()
            formula='y ~ x'
            r,adj_r2,n,p=famaMacBeth(formula,'t',subdf,lags=5)
            #TODO: why intercept tvalue is so large?
            # TODO: why some fm regression do not have a adj_r2 ?
            data.append([r.loc['x', 'coef'], r.loc['x', 'tvalue'],
                         r.loc['Intercept', 'coef'], r.loc['Intercept', 'tvalue'],
                         adj_r2, n])
            ps.append(p['x'])
            print(indicator)

        result = pd.DataFrame(data, index=self.indicators,
                              columns=['slope', 't', 'Intercept', 'Intercept_t', 'adj_r2', 'n']).T
        result.to_csv(os.path.join(self.path, 'fama macbeth regression analysis.csv'))

        parameters=pd.concat(ps,axis=1,keys=self.indicators)
        parameters.to_csv(os.path.join(self.path,'fama macbeth regression parameters in first stage.csv'))
Esempio n. 5
0
 def _get_port_data(self,indicator):
     groupid=DATA.by_indicators([indicator])
     groupid['g']=groupid.groupby('t',group_keys=False).apply(
         lambda df:pd.qcut(df[indicator],self.q,
                           labels=[indicator+str(i) for i in range(1,self.q+1)])
     )
     return groupid
Esempio n. 6
0
    def portfolio_analysis(self):
        '''
        table 8.4

        :return:
        '''
        #TODO: add a parameter to declare what risk models will be used. [ff3,capm,ff5]

        all_indicators = list(set(self.indicators + ['capM', 'eretM']))
        comb = DATA.by_indicators(all_indicators)

        result_eavg=[]
        result_wavg=[]
        for indicator in self.indicators:
            gcol='g_%s'%indicator
            # comb[gcol]=comb.groupby('t').apply(
            #     lambda df:grouping(df[indicator].reset_index(level='t'),self.q,labels=self.groupnames))
            comb[gcol]=comb.groupby('t',group_keys=False).apply(
                lambda df:assign_port_id(df[indicator], self.q, self.groupnames))
            #TODO:Add an alternative sorting method,that is,updating yearly as page 9 of Chen et al., “On the Predictability of Chinese Stock Returns.”

            panel_stk_eavg,panel_stk_wavg=self._get_panel_stk_avg(comb, indicator, gcol)
            for panel_stk in [panel_stk_eavg,panel_stk_wavg]:
                panel=panel_stk.unstack(level=[gcol])
                panel.columns=panel.columns.astype(str)
                panel['_'.join([self.groupnames[-1],self.groupnames[0]])]=panel[self.groupnames[-1]]-panel[self.groupnames[0]]
                panel['avg']=panel.mean(axis=1)
                #TODO: use the risk models declared above

                a_data = comb.groupby(['t', gcol])[indicator].mean()
                a_data = a_data.unstack()
                a_data.columns = a_data.columns.astype(str)
                a_data.index = a_data.index.astype(str)
                a_data['_'.join([self.groupnames[-1], self.groupnames[0]])] = a_data[self.groupnames[-1]] - a_data[
                    self.groupnames[0]]
                a_data['avg']=a_data.mean(axis=1)
                a = a_data.mean()
                a.name='avg'
                a=a.to_frame().T

                riskAdjusted=risk_adjust(panel)
                #TODO:something must be wrong with size or portfolio_analysse.
                if panel_stk is panel_stk_eavg:
                    result_eavg.append(pd.concat([a,riskAdjusted],axis=0))
                else:
                    result_wavg.append(pd.concat([a,riskAdjusted],axis=0))
        table_e=pd.concat(result_eavg,axis=0,keys=self.indicators)
        table_w=pd.concat(result_wavg,axis=0,keys=self.indicators)
        #reorder the columns
        initialOrder=table_e.columns.tolist()
        newOrder=self.groupnames+[col for col in initialOrder if col not in self.groupnames]
        table_e=table_e.reindex(columns=newOrder)
        table_w=table_w.reindex(columns=newOrder)

        table_e.to_csv(os.path.join(self.path,'univariate portfolio analysis-equal weighted.csv'))
        table_w.to_csv(os.path.join(self.path,'univariate portfolio analysis-value weighted.csv'))
Esempio n. 7
0
 def portfolio_characteristics(self,sortedIndicator,otherIndicators):
     '''
     as table 12.3 panel A
     :param sortedIndicator:
     :param otherIndicators:
     :return:
     '''
     groupid=self._get_port_data(sortedIndicator)
     comb=DATA.by_indicators(otherIndicators)
     comb=pd.concat([groupid,comb],axis=1)
     characteristics_avg=comb.groupby(['t','g']).mean().groupby('g').mean()
     characteristics_avg.to_csv(os.path.join(self.path,'portfolio characteristics.csv'))
    def get_percent_ratio(self):
        '''Fig 9.1  page 152'''
        def _get_ratio(s):
            s=s.dropna()
            total=s.shape[0]
            ratios = [0.01, 0.05, 0.10, 0.25]
            num=[int(r*total) for r in ratios]
            return pd.Series([s.nlargest(n).sum() / s.sum() for n in num],
                             index=ratios)

        df=DATA.by_indicators('mktCap')
        d=df.groupby('t')['mktCap'].apply(_get_ratio)
        fig=d.unstack().plot().get_figure()
        fig.savefig(os.path.join(self.path,'percent of market value.png'))
Esempio n. 9
0
    def _fm(self, ll_indeVars):
        '''
        :param ll_indeVars: list of list,the inside list contains all
            the indepedent variables to construct a regress equation

        :return:
        '''
        indeVars=list(set(var for l_indeVars in ll_indeVars for var in l_indeVars))
        indicators = indeVars + ['eretM']
        comb = DATA.by_indicators(indicators)
        # The independent variable is winsorized at a given level on a monthly basis. as page 170
        comb[indeVars]=comb.groupby('t')[indeVars].apply(lambda x:winsorize(x,limits=WINSORIZE_LIMITS,axis=0))
        comb = comb.reset_index()
        stks = []
        for l_indeVars in ll_indeVars:
            '''
            replace the old name with new name,since patsy do not support name starts with number 

            '''
            newname = ['name' + str(i) for i in range(1, len(l_indeVars) + 1)]
            df = comb[l_indeVars + ['t', 'eretM']].dropna()
            df.columns = newname + ['t', 'eretM']
            formula = 'eretM ~ ' + ' + '.join(newname)
            # TODO:lags?
            r, adj_r2, n,p= famaMacBeth(formula, 't', df, lags=5)#TODO:
            r = r.rename(index=dict(zip(newname, l_indeVars)))
            #save the first stage regression parameters
            p=p.rename(columns=dict(zip(newname,l_indeVars)))
            p.to_csv(os.path.join(self.path,'first stage parameters '+'_'.join(l_indeVars)+'.csv'))
            stk = r[['coef', 'tvalue']].stack()
            stk.index = stk.index.map('{0[0]} {0[1]}'.format)
            stk['adj_r2'] = adj_r2
            stk['n'] = n
            stks.append(stk)

        table = pd.concat(stks, axis=1, keys=range(1, len(ll_indeVars) + 1))

        newIndex = [var + ' ' + suffix for var in indicators for suffix in ['coef', 'tvalue']] + \
                   ['Intercept coef', 'Intercept tvalue', 'adj_r2', 'n']

        table = table.reindex(index=newIndex)

        table.to_csv(os.path.join(os.path.join(self.path, 'fama macbeth regression analysis.csv')))
Esempio n. 10
0
    def _get_dependent_data(self,indicators):
        '''

        :param indicators:list with two elements,the first is the controlling variable
        :return:
        '''

        # sometimes the self.indicators and ['mktCap','eret'] may share some elements
        comb=DATA.by_indicators(indicators+['capM','eretM'])
        comb=comb.dropna()
        comb['g1']=comb.groupby('t',group_keys=False).apply(
            lambda df:assign_port_id(df[indicators[0]], self.q,
                                     [indicators[0] + str(i) for i in range(1,self.q + 1)]))

        comb['g2']=comb.groupby(['t','g1'],group_keys=False).apply(
            lambda df:assign_port_id(df[indicators[1]], self.q,
                                     [indicators[1] + str(i) for i in range(1,self.q + 1)]))

        return comb
Esempio n. 11
0
    def _one_indicator(self,indicator):
        ns=range(0,12)
        all_indicators=list(set([indicator]+['capM','eretM']))
        comb=DATA.get_by_indicators(all_indicators)
        comb=comb.dropna()
        comb['g']=comb.groupby('t',group_keys=False).apply(
            lambda df:pd.qcut(df[indicator],self.q,
                              labels=[indicator+str(i) for i in range(1,self.q+1)])
        )

        def _one_indicator_one_weight_type(group_ts, indicator):
            def _big_minus_small(s, ind):
                time=s.index.get_level_values('t')[0]
                return s[(time, ind + str(self.q))] - s[(time, ind + '1')]

            spread_data=group_ts.groupby('t').apply(lambda series:_big_minus_small(series, indicator))
            s=risk_adjust(spread_data)
            return s

        eret=comb['eret'].unstack()

        s_es=[]
        s_ws=[]
        eret_names=[]
        for n in ns:
            eret_name='eret_ahead%s'%(n+1)
            comb[eret_name]=eret.shift(-n).stack()

            group_eavg_ts=comb.groupby(['t','g'])[eret_name].mean()
            group_wavg_ts=comb.groupby(['t','g']).apply(lambda df:np.average(df[eret_name],weights=df['mktCap']))

            s_e=_one_indicator_one_weight_type(group_eavg_ts,indicator)
            s_w=_one_indicator_one_weight_type(group_wavg_ts,indicator)
            s_es.append(s_e)
            s_ws.append(s_w)
            eret_names.append(eret_name)
        eq_table=pd.concat(s_es,axis=1,keys=eret_names)
        vw_table=pd.concat(s_ws,axis=1,keys=eret_names)
        return eq_table,vw_table
Esempio n. 12
0
    def _get_independent_data(self):
        # TODO: add the method of ratios such as [0.3,0.7]
        # sometimes the self.indicators and ['capM','eretM'] may share some elements
        comb=DATA.by_indicators([self.indicator1,self.indicator2,'capM','eretM'])
        comb=comb.dropna()
        comb['g1']=comb.groupby('t',group_keys=False).apply(
            lambda df:assign_port_id(df[self.indicator1], self.q,
                                     [self.indicator1 + str(i) for i in range(1, self.q + 1)]))

        comb['g2']=comb.groupby('t',group_keys=False).apply(
            lambda df:assign_port_id(df[self.indicator2], self.q,
                                     [self.indicator2 + str(i) for i in range(1,self.q + 1)]))

        # comb['g1']=comb.groupby('t',group_keys=False).apply(
        #     lambda df:pd.qcut(df[self.indicator1],self.q,
        #                       labels=[self.indicator1+str(i) for i in range(1,self.q+1)])
        # )
        #
        # comb['g2']=comb.groupby('t',group_keys=False).apply(
        #     lambda df:pd.qcut(df[self.indicator2],self.q,
        #                       labels=[self.indicator2+str(i) for i in range(1,self.q+1)])
        # )

        return comb
Esempio n. 13
0
    from dataset import MRIDataset as DATA
else:
    from dataset import MRIDataset_threechannel as DATA

if args.network == 'Inception_v3':
    from Inception_v3 import inception_v3_pretrain as MODEL

Transform = transforms.Compose(
    [transforms.Resize((SIZE, SIZE)),
     transforms.ToTensor()])

if __name__ == '__main__':
    # writer = SummaryWriter(path_to_logs_dir)
    dataset = DATA(path_to_data,
                   path_to_label,
                   mode=MODE,
                   transform=Transform,
                   aug=True)
    weight = 1. / torch.tensor([dataset.negative, dataset.positive],
                               dtype=torch.float)
    target = torch.tensor(dataset._label['label'], dtype=torch.long)
    sample_weight = torch.tensor([weight[t] for t in target],
                                 dtype=torch.float)
    sampler = WeightedRandomSampler(sample_weight, len(sample_weight))
    dataloader = DataLoader(dataset,
                            Batch_size,
                            sampler=sampler,
                            num_workers=1,
                            drop_last=True)

    dataset_test = DATA(path_to_testdata,