def do_fit(
        self,
        dataframe: pd.DataFrame,
        labeled_segments: List[AnalyticSegment],
        deleted_segments: List[AnalyticSegment],
        learning_info: LearningInfo
    ) -> None:
        data = utils.cut_dataframe(dataframe)
        data = data['value']
        self.state.pattern_center = list(set(self.state.pattern_center + learning_info.segment_center_list))
        self.state.pattern_model = utils.get_av_model(learning_info.patterns_list)
        convolve_list = utils.get_convolve(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size)
        correlation_list = utils.get_correlation(self.state.pattern_center, self.state.pattern_model, data, self.state.window_size)
        height_list = learning_info.patterns_value

        del_conv_list = []
        delete_pattern_width = []
        delete_pattern_height = []
        delete_pattern_timestamp = []
        for segment in deleted_segments:
            delete_pattern_timestamp.append(segment.pattern_timestamp)
            deleted = utils.get_interval(data, segment.center_index, self.state.window_size)
            deleted = utils.subtract_min_without_nan(deleted)
            del_conv = scipy.signal.fftconvolve(deleted, self.state.pattern_model)
            if len(del_conv):
                del_conv_list.append(max(del_conv))
            delete_pattern_height.append(utils.find_confidence(deleted)[1])

        self._update_fiting_result(self.state, learning_info.confidence, convolve_list, del_conv_list, height_list)
    def _n_fold_validation(self, train_data, train_target, n=10):

        n_samples = len(train_data)
        perm = np.random.permutation(n_samples)

        kendall_tau = np.full((n, len(self.model_pool)), np.nan)

        for i, tst_split in enumerate(np.array_split(perm, n)):
            trn_split = np.setdiff1d(perm, tst_split, assume_unique=True)

            # loop over all considered surrogate model in pool
            for j, model in enumerate(self.model_pool):

                acc_predictor = get_acc_predictor(model, train_data[trn_split],
                                                  train_target[trn_split])

                rmse, rho, tau = utils.get_correlation(
                    acc_predictor.predict(train_data[tst_split]),
                    train_target[tst_split])

                kendall_tau[i, j] = tau

        winner = int(
            np.argmax(
                np.mean(kendall_tau, axis=0) - np.std(kendall_tau, axis=0)))
        print("winner model = {}, tau = {}".format(
            self.model_pool[winner],
            np.mean(kendall_tau, axis=0)[winner]))
        self.winner = self.model_pool[winner]
        # re-fit the winner model with entire data
        acc_predictor = get_acc_predictor(self.model_pool[winner], train_data,
                                          train_target)
        self.model = acc_predictor
Exemple #3
0
    def do_fit(self, dataframe: pd.DataFrame, labeled_segments: list,
               deleted_segments: list, learning_info: dict) -> None:
        data = utils.cut_dataframe(dataframe)
        data = data['value']
        window_size = self.state['WINDOW_SIZE']
        last_pattern_center = self.state.get('pattern_center', [])
        self.state['pattern_center'] = list(
            set(last_pattern_center + learning_info['segment_center_list']))
        self.state['pattern_model'] = utils.get_av_model(
            learning_info['patterns_list'])
        convolve_list = utils.get_convolve(self.state['pattern_center'],
                                           self.state['pattern_model'], data,
                                           window_size)
        correlation_list = utils.get_correlation(self.state['pattern_center'],
                                                 self.state['pattern_model'],
                                                 data, window_size)
        height_list = learning_info['patterns_value']

        del_conv_list = []
        delete_pattern_width = []
        delete_pattern_height = []
        delete_pattern_timestamp = []
        for segment in deleted_segments:
            del_min_index = segment.center_index
            delete_pattern_timestamp.append(segment.pattern_timestamp)
            deleted = utils.get_interval(data, del_min_index, window_size)
            deleted = utils.subtract_min_without_nan(deleted)
            del_conv = scipy.signal.fftconvolve(deleted,
                                                self.state['pattern_model'])
            if len(del_conv): del_conv_list.append(max(del_conv))
            delete_pattern_height.append(utils.find_confidence(deleted)[1])
            delete_pattern_width.append(utils.find_width(deleted, False))

        self._update_fiting_result(self.state, learning_info['confidence'],
                                   convolve_list, del_conv_list, height_list)
Exemple #4
0
    def do_fit(self, dataframe: pd.DataFrame,
               labeled_segments: List[AnalyticSegment],
               deleted_segments: List[AnalyticSegment],
               learning_info: LearningInfo) -> None:
        data = utils.cut_dataframe(dataframe)
        data = data['value']
        last_pattern_center = self.state.pattern_center
        self.state.pattern_center = utils.remove_duplicates_and_sort(
            last_pattern_center + learning_info.segment_center_list)
        self.state.pattern_model = utils.get_av_model(
            learning_info.patterns_list)
        convolve_list = utils.get_convolve(self.state.pattern_center,
                                           self.state.pattern_model, data,
                                           self.state.window_size)
        correlation_list = utils.get_correlation(self.state.pattern_center,
                                                 self.state.pattern_model,
                                                 data, self.state.window_size)

        del_conv_list = []
        delete_pattern_timestamp = []
        for segment in deleted_segments:
            del_mid_index = segment.center_index
            delete_pattern_timestamp.append(segment.pattern_timestamp)
            deleted_pat = utils.get_interval(data, del_mid_index,
                                             self.state.window_size)
            deleted_pat = utils.subtract_min_without_nan(deleted_pat)
            del_conv_pat = scipy.signal.fftconvolve(deleted_pat,
                                                    self.state.pattern_model)
            if len(del_conv_pat): del_conv_list.append(max(del_conv_pat))

        self.state.convolve_min, self.state.convolve_max = utils.get_min_max(
            convolve_list, self.state.window_size / 3)
        self.state.conv_del_min, self.state.conv_del_max = utils.get_min_max(
            del_conv_list, self.state.window_size)
Exemple #5
0
    def do_scatter(i, j, ax):
        """ Draw single scatter plot
        """
        xs, ys = utils.extract(i, j, steadies)
        ax.scatter(xs, ys)

        ax.set_xlabel(r"$S_%d$" % i)
        ax.set_ylabel(r"$S_%d$" % j)

        cc = utils.get_correlation(xs, ys)
        ax.set_title(r"Corr: $%.2f$" % cc)
Exemple #6
0
    def do_scatter(i, j, ax):
        """ Draw single scatter plot
        """
        xs, ys = utils.extract(i, j, steadies)
        ax.scatter(xs, ys)

        ax.set_xlabel(r'$S_%d$' % i)
        ax.set_ylabel(r'$S_%d$' % j)

        cc = utils.get_correlation(xs, ys)
        ax.set_title(r'Corr: $%.2f$' % cc)
Exemple #7
0
def validate(net, data, target, device):
    net.eval()

    with torch.no_grad():
        data, target = data.to(device), target.to(device)
        pred = net(data)
        pred, target = pred.cpu().detach().numpy(), target.cpu().detach().numpy()

        rmse, rho, tau = get_correlation(pred, target)

    # print("Validation RMSE = {:.4f}, Spearman's Rho = {:.4f}, Kendall’s Tau = {:.4f}".format(rmse, rho, tau))
    return rmse, rho, tau, pred, target
def optimize_for_sharpe_ratio(allocation, start_value, df, symbols):
    symbols = symbols[1:]
    dr = utils.daily_returns(df)
    df = df[symbols]
    # print df
    corr = np.asarray(utils.get_correlation(dr))
    corr = np.asarray([1,-.12,.00209,-.004,-.007,.005])
    # print symbols
    cons = ({'type': 'eq', 'fun': lambda x:  1 - sum(x)})
    bnds = tuple((0, 1) for item in allocation)
    result = op.minimize(__get_sharpe_ratio, allocation, args=(df, symbols, start_value,corr), method='SLSQP', bounds=bnds,
                         constraints=cons)

    print "Correlation to SPY: ", corr[1:]

    return np.round(result.x, 2)
    def do_fit(self, dataframe: pd.DataFrame, labeled_segments: list, deleted_segments: list, learning_info: dict) -> None:
        data = utils.cut_dataframe(dataframe)
        data = data['value']
        last_pattern_center = self.state.get('pattern_center', [])
        self.state['pattern_center'] = list(set(last_pattern_center + learning_info['segment_center_list']))
        self.state['pattern_model'] = utils.get_av_model(learning_info['patterns_list'])
        convolve_list = utils.get_convolve(self.state['pattern_center'], self.state['pattern_model'], data, self.state['WINDOW_SIZE'])
        correlation_list = utils.get_correlation(self.state['pattern_center'], self.state['pattern_model'], data, self.state['WINDOW_SIZE'])

        del_conv_list = []
        delete_pattern_timestamp = []
        for segment in deleted_segments:
            del_mid_index = segment.center_index
            delete_pattern_timestamp.append(segment.pattern_timestamp)
            deleted_pat = utils.get_interval(data, del_mid_index, self.state['WINDOW_SIZE'])
            deleted_pat = utils.subtract_min_without_nan(deleted_pat)
            del_conv_pat = scipy.signal.fftconvolve(deleted_pat, self.state['pattern_model'])
            if len(del_conv_pat): del_conv_list.append(max(del_conv_pat))

        self.state['convolve_min'], self.state['convolve_max'] = utils.get_min_max(convolve_list, self.state['WINDOW_SIZE'] / 3)
        self.state['conv_del_min'], self.state['conv_del_max'] = utils.get_min_max(del_conv_list, self.state['WINDOW_SIZE'])
Exemple #10
0
    def do_fit(self, dataframe: pd.DataFrame,
               labeled_segments: List[AnalyticSegment],
               deleted_segments: List[AnalyticSegment],
               learning_info: LearningInfo) -> None:
        data = utils.cut_dataframe(dataframe)
        data = data['value']
        window_size = self.state.window_size
        last_pattern_center = self.state.pattern_center
        self.state.pattern_center = utils.remove_duplicates_and_sort(
            last_pattern_center + learning_info.segment_center_list)
        self.state.pattern_model = utils.get_av_model(
            learning_info.patterns_list)
        convolve_list = utils.get_convolve(self.state.pattern_center,
                                           self.state.pattern_model, data,
                                           window_size)
        correlation_list = utils.get_correlation(self.state.pattern_center,
                                                 self.state.pattern_model,
                                                 data, window_size)
        height_list = learning_info.patterns_value

        del_conv_list = []
        delete_pattern_timestamp = []
        for segment in deleted_segments:
            segment_cent_index = segment.center_index
            delete_pattern_timestamp.append(segment.pattern_timestamp)
            deleted_stair = utils.get_interval(data, segment_cent_index,
                                               window_size)
            deleted_stair = utils.subtract_min_without_nan(deleted_stair)
            del_conv_stair = scipy.signal.fftconvolve(deleted_stair,
                                                      self.state.pattern_model)
            if len(del_conv_stair) > 0:
                del_conv_list.append(max(del_conv_stair))

        self._update_fitting_result(self.state, learning_info.confidence,
                                    convolve_list, del_conv_list)
        self.state.stair_height = int(
            min(learning_info.pattern_height, default=1))
        self.state.stair_length = int(
            max(learning_info.pattern_width, default=1))
Exemple #11
0
def node_degree(data, bin_num_x=100, bin_num_y=100):
    """ Compare node degree and correlation
    """
    # get data
    ndegs = []
    avg_corrs = []
    node_num = -1
    for syst, mat, _ in data:
        graph = nx.DiGraph(syst.jacobian)
        for i in graph.nodes():
            ndegs.append(graph.degree(i))
            ncorrs = [abs(mat[i, j]) for j in graph.neighbors(i) if i != j]
            avg_corrs.append(
                np.mean(ncorrs) if len(ncorrs) > 0 else 0)
        node_num = graph.number_of_nodes()
    assert node_num >= 0, 'Invalid data found'

    # plot data
    heatmap, xedges, yedges = np.histogram2d(
        avg_corrs, ndegs,
        bins=(bin_num_x, bin_num_y))
    extent = [yedges[0], yedges[-1], xedges[0], xedges[-1]]
    heatmap = heatmap[::-1]
    plt.imshow(
        heatmap,
        extent=extent, interpolation='nearest',
        aspect=abs((extent[1]-extent[0])/(extent[3]-extent[2])))
    plt.colorbar()

    cc = get_correlation(ndegs, avg_corrs)
    plt.title(r'Corr: $%.2f$' % cc)

    plt.xlabel('node degree')
    plt.ylabel('average absolute correlation to other nodes')

    plt.tight_layout()
    save_figure('images/ndegree_corr.pdf', bbox_inches='tight')
    plt.close()
Exemple #12
0
def node_degree(data, bin_num_x=100, bin_num_y=100):
    """ Compare node degree and correlation
    """
    # get data
    ndegs = []
    avg_corrs = []
    node_num = -1
    for syst, mat, _ in data:
        graph = nx.DiGraph(syst.jacobian)
        for i in graph.nodes():
            ndegs.append(graph.degree(i))
            ncorrs = [abs(mat[i, j]) for j in graph.neighbors(i) if i != j]
            avg_corrs.append(np.mean(ncorrs) if len(ncorrs) > 0 else 0)
        node_num = graph.number_of_nodes()
    assert node_num >= 0, 'Invalid data found'

    # plot data
    heatmap, xedges, yedges = np.histogram2d(avg_corrs,
                                             ndegs,
                                             bins=(bin_num_x, bin_num_y))
    extent = [yedges[0], yedges[-1], xedges[0], xedges[-1]]
    heatmap = heatmap[::-1]
    plt.imshow(heatmap,
               extent=extent,
               interpolation='nearest',
               aspect=abs((extent[1] - extent[0]) / (extent[3] - extent[2])))
    plt.colorbar()

    cc = get_correlation(ndegs, avg_corrs)
    plt.title(r'Corr: $%.2f$' % cc)

    plt.xlabel('node degree')
    plt.ylabel('average absolute correlation to other nodes')

    plt.tight_layout()
    save_figure('images/ndegree_corr.pdf', bbox_inches='tight')
    plt.close()
Exemple #13
0
    def search(self):

        if self.resume:
            archive = self._resume_from_dir()
        else:
            # the following lines corresponding to Algo 1 line 1-7 in the paper
            archive = [
            ]  # initialize an empty archive to store all trained CNNs

            # Design Of Experiment
            if self.iterations < 1:
                arch_doe = self.search_space.sample(self.n_doe)
            else:
                arch_doe = self.search_space.initialize(self.n_doe)

            # parallel evaluation of arch_doe
            top1_err, complexity = self._evaluate(arch_doe, it=0)

            # store evaluated / trained architectures
            for member in zip(arch_doe, top1_err, complexity):
                archive.append(member)

        # reference point (nadir point) for calculating hypervolume
        ref_pt = np.array(
            [np.max([x[1] for x in archive]),
             np.max([x[2] for x in archive])])

        # main loop of the search
        for it in range(1, self.iterations + 1):

            # construct accuracy predictor surrogate model from archive
            # Algo 1 line 9 / Fig. 3(a) in the paper
            acc_predictor, a_top1_err_pred = self._fit_acc_predictor(archive)

            # search for the next set of candidates for high-fidelity evaluation (lower level)
            # Algo 1 line 10-11 / Fig. 3(b)-(d) in the paper
            candidates, c_top1_err_pred = self._next(archive, acc_predictor,
                                                     self.n_iter)

            # high-fidelity evaluation (lower level)
            # Algo 1 line 13-14 / Fig. 3(e) in the paper
            c_top1_err, complexity = self._evaluate(candidates, it=it)

            # check for accuracy predictor's performance
            rmse, rho, tau = get_correlation(
                np.vstack((a_top1_err_pred, c_top1_err_pred)),
                np.array([x[1] for x in archive] + c_top1_err))

            # add to archive
            # Algo 1 line 15 / Fig. 3(e) in the paper
            for member in zip(candidates, c_top1_err, complexity):
                archive.append(member)

            # calculate hypervolume
            hv = self._calc_hv(
                ref_pt,
                np.column_stack(
                    ([x[1] for x in archive], [x[2] for x in archive])))

            # print iteration-wise statistics
            print("Iter {}: hv = {:.2f}".format(it, hv))
            print(
                "fitting {}: RMSE = {:.4f}, Spearman's Rho = {:.4f}, Kendall’s Tau = {:.4f}"
                .format(self.predictor, rmse, rho, tau))

            # dump the statistics
            with open(os.path.join(self.save_path, "iter_{}.stats".format(it)),
                      "w") as handle:
                json.dump(
                    {
                        'archive': archive,
                        'candidates': archive[-self.n_iter:],
                        'hv': hv,
                        'surrogate': {
                            'model':
                            self.predictor,
                            'name':
                            acc_predictor.name,
                            'winner':
                            acc_predictor.winner
                            if self.predictor == 'as' else acc_predictor.name,
                            'rmse':
                            rmse,
                            'rho':
                            rho,
                            'tau':
                            tau
                        }
                    }, handle)
            if _DEBUG:
                # plot
                plot = Scatter(legend={'loc': 'lower right'})
                F = np.full((len(archive), 2), np.nan)
                F[:,
                  0] = np.array([x[2]
                                 for x in archive])  # second obj. (complexity)
                F[:, 1] = 100 - np.array([x[1]
                                          for x in archive])  # top-1 accuracy
                plot.add(F,
                         s=15,
                         facecolors='none',
                         edgecolors='b',
                         label='archive')
                F = np.full((len(candidates), 2), np.nan)
                F[:, 0] = np.array(complexity)
                F[:, 1] = 100 - np.array(c_top1_err)
                plot.add(F, s=30, color='r', label='candidates evaluated')
                F = np.full((len(candidates), 2), np.nan)
                F[:, 0] = np.array(complexity)
                F[:, 1] = 100 - c_top1_err_pred[:, 0]
                plot.add(F,
                         s=20,
                         facecolors='none',
                         edgecolors='g',
                         label='candidates predicted')
                plot.save(
                    os.path.join(self.save_path, 'iter_{}.png'.format(it)))

        return
def main_run():

    # h = [.01,  .01,   .01,.01,  .01,.01,  .01,.01]
    # # h = pd.Series([0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025],dtype=float)
    #
    # # x = [3.,  2.,   1.5,2.,  1.5,3.,  4.5,4.9]
    # # y = [4.2,1.9,1.9,2.1,2.0  ,2.5,3.9,4.5]
    #
    # x = [1.5,  2.,   1.5,2.,  1.5,3.,  4.5,4.9]
    # y = [4.2,1.9,1.9,2.1,2.0  ,2.5,3.9,4.5]
    # z = [0.012,0.032,0.035,0.902,0.052,0.302,-0.909,0.902]
    # # df.corr()
    # b = [1.,  -1.,   -1.,1.,  1.,-1.,  1,-1]
    # p = [0.12,0.55,0.45,0.3,0.53,0.57,0.19,0.58]

    start_value = 1000000
    # symbols = ['AC','ALI','BDO','BPI','DMC','GLO',
    #            'GTCAP','HCP','JFC','MBT','MPI','MEG',
    #            'RLC','SECB','SM','SMPH','TEL','URC']
    # symbols = ['AAPL','IBM','XOM','GLD']
    # symbols = ['goog','aapl','msft','amzn']
    symbols = ['AAPL', 'MSFT','ORCL','QCOM','BBY','MU','GILD','YUM','NFLX','VZ','APA','RRC','MDLZ','CSCO','V','MET','SBUX','GGP','UA','GM']
    h = np.ones(len(symbols))

    addressable_dates = pd.date_range('2012-01-01','2015-12-31')
    df = utils.get_data_online(symbols, addressable_dates)
    df = pd.DataFrame(df,index=df.index,dtype=np.float64)
    df = df.dropna()

    x = np.asarray(utils.daily_returns(df[symbols[1:]]).std())
    # print x
    y = np.asarray(utils.daily_returns(df[symbols[1:]]).mean())
    # print y
    # b = [1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.]
    z = np.asarray(utils.get_correlation(df)[1:])
    # print z
    # p = [.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5]
    b = np.ones(len(symbols) - 1)
    # print b
    p = Accuracy.get_accuracy(symbols,addressable_dates)
    # print p
    sharpe_ratios = utils.get_sharpe_ratio(utils.daily_returns(df[symbols[1:]]))
    # print sharpe_ratios

    data = (x,y,z,b,p)


    x_sorted = np.sort(x)
    y_sorted = np.sort(y)

    # print "h",np.transpose( h) * y

    # print "cost", error(h)

    # cons = ({'type':'eq', 'fun': lambda x: 1 - sum(x)})
    cons = ({'type': 'eq', 'fun': lambda x:  1 - sum(x)})

    # bounds = tuple((0,1) for it in h)
    bnds = tuple((0,1) for it in h)
    bounds = bnds
    # bounds = (0,1)

    min_result = spo.minimize(error,h,args=(data,), method='SLSQP',bounds=bounds, constraints=cons, options={'disp':True})

    print "Parameters = {}, Y = {}".format(np.round(min_result.x,2), np.abs( min_result.fun)) # for tracing

    # print "xdata",xdata
    model_bounds = max(x.max(),y.max())
    xdata = np.linspace(0,5,8)

    y_main = func(xdata,model_bounds,1,-model_bounds)


    print "bias", b * np.round(min_result.x,3)
    print "risk: ", x
    print "rewards: ", y
    print "correlation: ", z
    print "accuracy: ", p

    print "y main",y_main
    plt.plot(xdata,y_main)


    y = func(xdata,2.5,1.3,0.5)
    ydata = y + 0.2 * np.random.normal(size=len(xdata))


    coeffs, pcov = curve_fit(func,xdata,ydata)

    yaj = func(xdata, coeffs[0], coeffs[1], coeffs[2])

    print pcov


    # plt.scatter(xdata,ydata)
    # plt.plot(xdata,yaj)

    # c,p = curve_fit(sigmoid,x_sorted,y_sorted)


    plt.scatter(x_sorted,y_sorted)



    plt.show()





    return None