Python str2date Examples

Programming Language: Python

Namespace/Package Name: datautils

Method/Function: str2date

Examples at hotexamples.com: 2

Python str2date - 2 examples found. These are the top rated real world Python examples of datautils.str2date extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: gendataset.py Project: theharshest/yelp_dataset_challenge

def run_script(pdate_str, busjson, revjson, tipjson, senticsv, outfile):
    # convert pdate to seconds since the epoch
    pdate = du.date2int(du.str2date(pdate_str))

    # load business objects
    print 'Loading business objects from %s...' % busjson
    all_buses, junk = ju.load_objects(busjson)

    # load review objects
    print 'loading review objects from %s...' % revjson
    all_reviews, junk = ju.load_objects(revjson)

    # load tip objects
    print 'loading tip objects from %s...' % tipjson
    all_tips, junk = ju.load_objects(tipjson)
    
    # load sentiment ranking data derived from tip and review data
    print 'loading sentiment rankings from %s...' % senticsv
    all_senti = cu.load_matrix(senticsv, has_hdr=False)

    # generate a data set the specified prediction date
    print('generate data set for prediction date %s...' % pdate_str)
    buses = du.gen_dataset(pdate, all_buses, all_reviews, all_tips, all_senti)
    
    # write data set to file
    print('writing generated data set to %s...' % outfile)
    ju.save_objects(buses, outfile)

Example #2

Show file

File: runwfcv.py Project: theharshest/yelp_dataset_challenge

def run_script(busjson, revjson, tipjson, senticsv, init_pdate, delta, ctype=linsvm,
               usamp=True, binary=None, rfe=False, pca=-1, reg=False, feat_info=fi.data_feat_info,
               states=None):
    print 'Initial prediction date: %s' % init_pdate
    print 'Time delta: %d months' % delta
    if (states):
        print 'limiting data to restaurants in: %s' % str(states)

    # convert pdate to secondds since the epoch
    pdate = du.date2int(du.str2date(init_pdate))

    # load business objects
    print 'Loading business objects from %s...' % busjson
    all_buses, junk = ju.load_objects(busjson)

    # load review objects
    print 'loading review objects from %s...' % revjson
    all_reviews, junk = ju.load_objects(revjson)

    # load tip objects
    print 'loading tip objects from %s...' % tipjson
    all_tips, junk = ju.load_objects(tipjson)
    
    # load sentiment ranking data derived from tip and review data
    print 'loading sentiment rankings from %s...' % senticsv
    all_senti = cu.load_matrix(senticsv, has_hdr=False)

    # reduce the number of features using recursive feature elimination
    # - See http://scikit-learn.org/stable/auto_examples/plot_rfe_with_cross_validation.html#example-plot-rfe-with-cross-validation-py
    # - See http://stackoverflow.com/questions/23815938/recursive-feature-elimination-and-grid-search-using-scikit-learn

    if (reg):
        # create the least squares linear regressor
        print 'using least squares linear regression...'
        c = linmod.LinearRegression()
        # grid search not supported for linear regression (???)
        param_grid = None
    elif (ctype==rbfsvm):
        # create RBF SVM to test
        #c = svm.NuSVC(kernel='rbf')
        c = svm.SVC(kernel='rbf')
        # configure parameter grid for grid search
        C_range = 10.0 ** np.arange(-3, 5)
        gamma_range = 10.0 ** np.arange(-4, 3)
        if (rfe):
            print 'RFE not currently supported for RBF SVM...'
            #c = fs.RFECV(c, step=1)
            #pgrid = []
            #for C in C_range:
            #    for gamma in gamma_range:
            #        pgrid.append({'C':C,'gamma':gamma})
            #pgrid = [{'gamma':0.5},{'gamma':0.1},{'gamma':0.01},{'gamma':0.001},{'gamma':0.0001}]
            #param_grid = {'estimator_params': pgrid}
        print 'using RBF SVM...'
        param_grid = dict(gamma=gamma_range, C=C_range)
    elif (ctype==knn):
        # create a KNN classifier
        c = neigh.KNeighborsClassifier()
        if (rfe):
            print 'RFE not currently supported for k-nearesrt neighbors...'
        print 'using k-mearest neighbors...'
        param_grid = {'n_neighbors':[1,2,3,4,5,6,7,8,9,10,15,20,25,30],
                      'weights':['uniform','distance'],
                      'p':[1,2,3,4,5,6,7,8,9,10]}
    elif (ctype==ada):
        # create boosted classifier
        c = ensemble.AdaBoostClassifier()
        if (rfe):
            print 'RFE not currently supported for AdaBoost...'
        print 'using AdaBoost...'
        param_grid = {'n_estimators':[5, 10, 25, 40, 50, 60, 75, 85, 100],
                      'learning_rate':[0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0]}
    elif (ctype==rf):
        # create random forest classifier
        c = ensemble.RandomForestClassifier()
        if (rfe):
            print 'RFE not currently supported for random forest...'
        print 'using random forest...'
        param_grid = {'n_estimators':[5, 10, 25, 40, 50, 60, 75, 85, 100],
                      'max_depth':[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, None]}
    elif (ctype==dt):
        # create decision tree classifier
        c = tree.DecisionTreeClassifier()
        # max feats - subtract 1 because data feats includes the class label
        if (rfe):
            print 'RFE not supported with decision trees...'
        print 'using decision tree...'
        param_grid = {'max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, None]}
    else:
        # create linear SVM to test
        c = svm.LinearSVC()
        # configure parameter grid for grid search
        C_range = 10.0 ** np.arange(-3, 5)
        if (rfe):
            print 'using linear SVM with RFE...'
            c = fs.RFECV(c, step=1)
            pgrid = []
            for C in C_range:
                pgrid.append({'C':C})
            #pgrid = [{'C':0.01},{'C':0.1},{'C':1},{'C':10},{'C':100},{'C':1000},{'C':10000}]
            param_grid = {'estimator_params': pgrid}
        else:
            print 'using linear SVM...'
            param_grid = {'C': C_range}

    # run the walk-forward cross validation and collect the results
    print('run walk-forward cross validation...')
    if (usamp):
        print('  under-sampling still open class...')
    else:
        print('  NOT under-sampling still open class...')
    results = wfcvutils.wfcv(c, param_grid, all_buses, all_reviews, all_tips, all_senti,
                             pdate, delta*du.month, pca=pca, usamp=usamp,
                             binary=binary, reg=reg, feat_info=feat_info, states=states)
    
    # combine the results to produce overall metrics
    y_true = None
    y_pred = None
    for r in results:
        if (y_true is None):
            y_true = r[0]
        else:
            y_true = np.hstack((y_true, r[0]))
        if (y_pred is None):
            y_pred = r[1]
        else:
            y_pred = np.hstack((y_pred, r[1]))

    # print out an overall classification report
    print('\n=========================================')
    print('Overall metrics for all prediction dates:\n')
    if (len(results) != 0):
        if (reg):
            wfcvutils.print_reg_metrics(y_true, y_pred)
        else:
            cm = metrics.confusion_matrix(y_true, y_pred)
            wfcvutils.print_cm(cm)
            #print(metrics.classification_report(y_true, y_pred, target_names=fi.class_names))
    else:
        print '  NO RESULTS\n'