Python write_submission 예제들, data_io.write_submission Python 예제들

예제 #1

0

파일 보기

파일: predict.py 프로젝트: magarage/NeigborhoodRecommenderSystem

def main():
    testset = pd.read_csv(path + "test_x.csv", index_col=0)

    ## deal with the NAs, and add features
    #train.feature_eng(test)

    ## predict
    print "Loading the predict_model classifier.."
    tstart = datetime.now()

    classifier = data_io.load_model("predict_model")
    print "Time used", datetime.now() - tstart

    print "Making predictions on the predict_model"
    tstart = datetime.now()
    fnames = ['year', 'month', 'trade_no', 'sigungu_no', 'price', 'monthly_expense']
    test_f = testset[fnames].values
    predic_proba = classifier.predict_proba(test_f)[:,1]

    print "Time used", datetime.now() - tstart

    ## Making prediction
    prediction = zip(testset['year'],
                        testset['month'],
                        testset['trade_no'],
                        testset['sigungu_no'],
                        testset['price'],
                        testset['monthly_expense'],
                        predic_proba)

    print "Writing predictions to file.."
    tstart = datetime.now()
    data_io.write_submission(prediction)
    print "Time used,", datetime.now() - tstart

예제 #2

0

파일 보기

파일: solution.py 프로젝트: justefg/LinearRegression

def main():
    print("Reading in the training data")
    data = data_io.get_train_df()
    print("Extracting features")
    feature_extractor = Vectorizer(MAX_FEATURES)
    category_vectorizer = DictVectorizer()


    #category_title = pd.get_dummies(train['Title'])
    #print (category_vectorizer.shape, X.shape)

    X = form_input(data, feature_extractor, category_vectorizer)
    #location = pd.get_dummies(train['LocationNormalized'])
    #X = hstack((X, location))
    #contract_time = pd.get_dummies(train['ContractTime'])
    #X = hstack((X, contract_time))
    #print(X)
    y = data["SalaryNormalized"]
    print("Training model")
    linreg.train(X, y)
    print("Making predictions")
    predictions = linreg.predict(X)
    mae_train = metrics.MAE(predictions, data["SalaryNormalized"])
    print('MAE train=%s', mae_train)


    print("Validating...")

    data = data_io.get_valid_df()
    X = form_input(data, feature_extractor, category_vectorizer, train=False)
    predictions = linreg.predict(X)
    data_io.write_submission(predictions)

    '''

예제 #3

0

파일 보기

파일: predict.py 프로젝트: diogo149/causality

def main():

    print "sklearn version", pkg_resources.get_distribution("scikit-learn").version
    print "numpy version", pkg_resources.get_distribution("numpy").version
    print "pandas version", pkg_resources.get_distribution("pandas").version
    print("Loading the classifier")
    clf = data_io.load_model()

    X = data_io.load_matlab_valid_features()
    X = delete_unused_columns(X)
    X = X.fillna(0)
    
    if(X is None):
        print("No feature file found!")
        exit(1)

    print_importances(X,clf, 0.0)
    print("Predictions outcomes with shape: " + str(X.shape))
    print clf
    predictions = clf.predict(X)
    #predictions = clf.predict_pruned(X,3000)
   
    predictions = predictions.flatten()
    
   
    print("Writing predictions to file")
    data_io.write_submission(predictions)

예제 #4

0

파일 보기

파일: predict.py 프로젝트: pratapbhanu/misc

def main():
    print("Getting features for valid papers from the database")
    if (os.path.exists("features_valid.obj")):
        with open("features_valid.obj", 'r') as loadfile:
            data = cPickle.load(loadfile)
    else:
        data = data_io.get_features_db("ValidPaper")
        with open("features_valid.obj", 'w') as dumpfile:
            cPickle.dump(data, dumpfile, protocol=cPickle.HIGHEST_PROTOCOL)
    author_paper_ids = [x[:2] for x in data]
    features = [x[2:] for x in data]

    print("Loading the classifier")
    classifier = data_io.load_model(prefix="forest_")

    print("Making predictions")
    predictions = classifier.predict_proba(features)[:, 1]
    predictions = list(predictions)

    author_predictions = defaultdict(list)
    paper_predictions = {}

    for (a_id, p_id), pred in zip(author_paper_ids, predictions):
        author_predictions[a_id].append((pred, p_id))

    for author_id in sorted(author_predictions):
        paper_ids_sorted = sorted(author_predictions[author_id], reverse=True)
        paper_predictions[author_id] = [x[1] for x in paper_ids_sorted]

    print("Writing predictions to file")
    data_io.write_submission(paper_predictions, prefix="forest_")

예제 #5

0

파일 보기

파일: model.py 프로젝트: anthonylife/NextPlace

 def recommendationNewPOI(self, submission_path):
     index_extent = (-90, -180, 90, 180)
     ndimx = int((index_extent[3]-index_extent[1])/settings["GRID_LNG"])
     ndimy = int((index_extent[2]-index_extent[0])/settings["GRID_LAT"])
     recommendation_result = {}
     user_visited = defaultdict(list)
     for entry in csv.reader(open(self.trdata_path)):
         uid, pid1, pid2 = int(entry[0]), int(entry[1]), int(entry[4])
         user_visited[uid].append(pid1)
         user_visited[uid].append(pid2)
     for i, entry in enumerate(csv.reader(open(self.tedata_path))):
         uid, pid1 = int(entry[0]), int(entry[1])
         near_grids = getNearGridsForPOI(self.pois_latlng[pid1], ndimx, ndimy, True)
         candidate_pois = []
         for grididx in near_grids:
             candidate_pois += self.grids_pois[grididx[0]][grididx[1]]
         result = []
         pois_score = []
         for c_pid in set(candidate_pois)-user_visited[uid]:
             if self.bias_tag == True:
                 score = np.dot(self.user_factor[self.user_ids[uid]]+self.query_factor[self.poi_ids[pid1]], self.poi_factor[self.poi_ids[c_pid]])\
                       + self.poi_bias[self.poi_ids[c_pid]]
             else:
                 score = np.dot(self.user_factor[self.user_ids[uid]]+self.query_factor[self.poi_ids[pid1]], self.poi_factor[self.poi_ids[c_pid]])
             pois_score.append([c_pid, score])
         result = sorted(pois_score, key=lambda x:x[1], reverse=True)[:settings["MAX_TOPK"]]
         recommendation_result[i] = [pair[0] for pair in result]
         sys.stdout.write("\rFINISHED RECOMMENDATION TRIPLE NUM: %d. " % (i+1))
         sys.stdout.flush()
     write_submission(recommendation_result, submission_path)

예제 #6

0

파일 보기

파일: model.py 프로젝트: anthonylife/NextPlace

 def recommendation(self, submission_path):
     index_extent = (-90, -180, 90, 180)
     ndimx = int((index_extent[3]-index_extent[1])/settings["GRID_LNG"])
     ndimy = int((index_extent[2]-index_extent[0])/settings["GRID_LAT"])
     recommendation_result = {}
     cache_user_poi_score = defaultdict(dict)
     for i, entry in enumerate(csv.reader(open(self.tedata_path))):
         uid, pid1 = int(entry[0]), int(entry[1])
         near_grids = getNearGridsForPOI(self.pois_latlng[pid1], ndimx, ndimy, True)
         candidate_pois = []
         for grididx in near_grids:
             candidate_pois += self.grids_pois[grididx[0]][grididx[1]]
         result = []
         pois_score = []
         for c_pid in candidate_pois:
             if uid in cache_user_poi_score and c_pid in cache_user_poi_score[uid]:
                 result.append([c_pid, cache_user_poi_score[uid][c_pid]])
             else:
                 if self.bias_tag == True:
                     score = np.dot(self.user_factor[self.user_ids[uid]],\
                             self.poi_factor[self.poi_ids[c_pid]])\
                             + self.poi_bias[self.poi_ids[c_pid]]
                 else:
                     score = np.dot(self.user_factor[self.user_ids[uid]],\
                             self.poi_factor[self.poi_ids[c_pid]])
                 pois_score.append([c_pid, score])
                 cache_user_poi_score[uid][c_pid] = score
         result = sorted(pois_score, key=lambda x:x[1], reverse=True)[:settings["MAX_TOPK"]]
         recommendation_result[i] = [pair[0] for pair in result]
         sys.stdout.write("\rFINISHED PAIR NUM: %d. " % (i+1))
         sys.stdout.flush()
     write_submission(recommendation_result, submission_path)

예제 #7

0

파일 보기

파일: personalPopular.py 프로젝트: anthonylife/NextPlace

    def recommendation(self, submission_path):
        index_extent = (-90, -180, 90, 180)
        ndimx = int((index_extent[3] - index_extent[1]) / settings["GRID_LNG"])
        ndimy = int((index_extent[2] - index_extent[0]) / settings["GRID_LAT"])
        recommendation_result = {}
        for i, entry in enumerate(csv.reader(open(self.tedata_path))):
            uid, pid1 = int(entry[0]), int(entry[1])
            near_grids = getNearGridsForPOI(self.pois_latlng[pid1], ndimx,
                                            ndimy, True)
            pois_score = []
            for grididx in near_grids:
                for candidate_poi in self.grids_pois[grididx[0]][grididx[1]]:
                    if candidate_poi in self.per_pois_pop[uid]:
                        pois_score.append([
                            candidate_poi,
                            self.per_pois_pop[uid][candidate_poi]
                        ])
                    else:
                        pois_score.append([candidate_poi, 0])

            result = sorted(pois_score, key=lambda x: x[1],
                            reverse=True)[:settings["MAX_TOPK"]]
            recommendation_result[i] = [pair[0] for pair in result]
            print i
        write_submission(recommendation_result, submission_path)

예제 #8

0

파일 보기

파일: predict.py 프로젝트: zyan0/Kdd2013AuthorPaperIdentification

def main():
    print("Reading the test data")
    test = data_io.read_test()

    print("Reading in the meta data")
    paper_author, paper_author_indexed = f.get_paper_author()

    print("Computing Relational Information")
    computed_features = f.get_all_computed_features(paper_author)

    print("Loading the classifier")
    classifier = data_io.load_model()

    print("Making predictions")
    predictions = []
    for author_id, row in test.iterrows():
        features = []
        paper_ids = []
        for paper_id in row["PaperIds"]:
            s = f.get_features(paper_id, author_id, paper_author_indexed,
                               computed_features)
            if s is None:
                print("Error at Author Id %d And Paper Id %d" %
                      (author_id, paper_id))
            else:
                features.append(s)
                paper_ids.append(paper_id)
        feature_matrix = pd.DataFrame(features)
        preds = classifier.predict_proba(feature_matrix)[:, 1]
        paper_ids_sorted = sorted(zip(preds, row["PaperIds"]), reverse=True)
        print(paper_ids_sorted)
        predictions.append([x[1] for x in paper_ids_sorted])

    print("Writing predictions to file")
    data_io.write_submission(predictions)

예제 #9

0

파일 보기

파일: predict.py 프로젝트: ruiliu310/Kdd2013AuthorPaperIdentification

def main():
    print "Getting features for valid papers from the database"
    data = data_io.get_features_db("ValidPaper")
    author_paper_ids = [x[:2] for x in data]
    features = [x[2:] for x in data]

    print "Loading the classifier"
    classifier = data_io.load_model()

    print "Making predictions"
    predictions = classifier.predict_proba(features)[:,1]
    predictions = list(predictions)

    author_predictions = defaultdict(list)
    paper_predictions = {}

    for (a_id, p_id), pred in zip(author_paper_ids, predictions):
        author_predictions[a_id].append((pred, p_id))

    for author_id in sorted(author_predictions):
        paper_ids_sorted = sorted(author_predictions[author_id], reverse=True)
        paper_predictions[author_id] = [x[1] for x in paper_ids_sorted]

    print "Writing predictions to file"
    data_io.write_submission(paper_predictions)

예제 #10

0

파일 보기

파일: Predictor.py 프로젝트: adria-p/MitosisDetection

 def runWithoutWndchrm(self):
     print "Loading the classifier"
     classifier = data_io.load_model()
     imageCollections = data_io.get_valid_df()
     featureGetter = FeatureGetter()
     print "Getting the features"
     fileName = data_io.get_savez_name_test()
     if not self.load:  #Last features calculated from candidates
         (namesObservations, coordinates,
          valid) = Utils.calculateFeatures(fileName, featureGetter,
                                           imageCollections)
     else:
         (namesObservations, coordinates,
          valid) = Utils.loadFeatures(fileName)
     print "Making predictions"
     #valid = normalize(valid, axis=0) #askdfhashdf
     predictions = classifier.predict(valid)
     predictions = predictions.reshape(len(predictions), 1)
     print "Writing predictions to file"
     data_io.write_submission(namesObservations, coordinates, predictions)
     data_io.write_submission_nice(namesObservations, coordinates,
                                   predictions)
     print "Calculating final results"
     return Predictor.finalResults(namesObservations, predictions,
                                   coordinates)

예제 #11

0

파일 보기

 def recommendation(self, submission_path):
     index_extent = (-90, -180, 90, 180)
     ndimx = int((index_extent[3] - index_extent[1]) / settings["GRID_LNG"])
     ndimy = int((index_extent[2] - index_extent[0]) / settings["GRID_LAT"])
     recommendation_result = {}
     cache_user_poi_score = defaultdict(dict)
     for i, entry in enumerate(csv.reader(open(self.tedata_path))):
         uid, pid1 = int(entry[0]), int(entry[1])
         near_grids = getNearGridsForPOI(self.pois_latlng[pid1], ndimx,
                                         ndimy, True)
         candidate_pois = []
         for grididx in near_grids:
             candidate_pois += self.grids_pois[grididx[0]][grididx[1]]
         result = []
         pois_score = []
         for c_pid in candidate_pois:
             if uid in cache_user_poi_score and c_pid in cache_user_poi_score[
                     uid]:
                 result.append([c_pid, cache_user_poi_score[uid][c_pid]])
             else:
                 if self.bias_tag == True:
                     score = np.dot(self.user_factor[self.user_ids[uid]],\
                             self.poi_factor[self.poi_ids[c_pid]])\
                             + self.poi_bias[self.poi_ids[c_pid]]
                 else:
                     score = np.dot(self.user_factor[self.user_ids[uid]],\
                             self.poi_factor[self.poi_ids[c_pid]])
                 pois_score.append([c_pid, score])
                 cache_user_poi_score[uid][c_pid] = score
         result = sorted(pois_score, key=lambda x: x[1],
                         reverse=True)[:settings["MAX_TOPK"]]
         recommendation_result[i] = [pair[0] for pair in result]
         sys.stdout.write("\rFINISHED PAIR NUM: %d. " % (i + 1))
         sys.stdout.flush()
     write_submission(recommendation_result, submission_path)

예제 #12

0

파일 보기

파일: predict.py 프로젝트: shubhampachori12110095/KDDCUP2013_SYSU

def main():
    print("Getting features for valid papers from the database")
    data = data_io.get_features_db("ValidPaper")
    author_paper_ids = [x[:2] for x in data]
    features = [x[2:] for x in data]

    print("Loading the classifier")
    classifier = data_io.load_model()
    print classifier.feature_importances_

    print("Making predictions")
    predictions = classifier.predict_proba(features)[:,1]
    predictions = list(predictions)

    author_predictions = defaultdict(list)
    paper_predictions = {}

    for (a_id, p_id), pred in zip(author_paper_ids, predictions):
        author_predictions[a_id].append((pred, p_id))

    for author_id in sorted(author_predictions):
        paper_ids_sorted = sorted(author_predictions[author_id], reverse=True)
        paper_predictions[author_id] = [x[1] for x in paper_ids_sorted]

    print("Writing predictions to file")
    data_io.write_submission(paper_predictions)

예제 #13

0

파일 보기

파일: predict.py 프로젝트: asiawangxinhou/Kdd2013AuthorPaperIdentification

def main():
    print("Reading the test data") 
    test = data_io.read_test()

    print("Reading in the meta data")
    paper_author, paper_author_indexed = f.get_paper_author()

    print("Computing Relational Information")
    computed_features = f.get_all_computed_features(paper_author)

    print("Loading the classifier")
    classifier = data_io.load_model()

    print("Making predictions")
    predictions = []
    for author_id, row in test.iterrows():
        features = []
        paper_ids = []
        for paper_id in row["PaperIds"]:
            s = f.get_features(paper_id, author_id, paper_author_indexed,computed_features)
            if s is None:
                print("Error at Author Id %d And Paper Id %d" % (author_id, paper_id))
            else:
                features.append(s)
                paper_ids.append(paper_id)
        feature_matrix = pd.DataFrame(features)
        preds = classifier.predict_proba(feature_matrix)[:,1]
        paper_ids_sorted = sorted(zip(preds,row["PaperIds"]), reverse=True)
        print(paper_ids_sorted)
        predictions.append([x[1] for x in paper_ids_sorted])

    print("Writing predictions to file")
    data_io.write_submission(predictions)

예제 #14

0

파일 보기

파일: predict.py 프로젝트: sjuvekar/Kaggle-Expedia-Raking

def main():
    print("Reading test data")
    test_chunks = data_io.read_test_features()
    test = pandas.concat([chunk for chunk in test_chunks], ignore_index=True)

    feature_names = list(test.columns)
    #feature_names.remove("date_time")

    features = test[feature_names].values

    print("Loading the classifier")
    classifiers = data_io.load_model()

    print("Making predictions")
    #orig_predictions = classifier.predict_proba(features)
    #multiplier = 2 ** classifier.classes_ 
    #predictions = orig_predictions * multiplier
    #predictions = predictions.sum(axis=1)
    predictions = class_probabilities(features, classifiers)
    print predictions
    predictions = list(-1.0*predictions)
    recommendations = zip(test["srch_id"], test["prop_id"], predictions)

    print("Writing predictions to file")
    data_io.write_submission(recommendations)

예제 #15

0

파일 보기

파일: predict.py 프로젝트: pratapbhanu/misc

def main():
    print("Getting features for valid papers from the database")
    if(os.path.exists("features_valid.obj")):
        with open("features_valid.obj", 'r') as loadfile:
            data = cPickle.load(loadfile)
    else:
        data = data_io.get_features_db("ValidPaper")
        with open("features_valid.obj", 'w') as dumpfile:
            cPickle.dump(data, dumpfile, protocol=cPickle.HIGHEST_PROTOCOL)
    author_paper_ids = [x[:2] for x in data]
    features = [x[2:] for x in data]

    print("Loading the classifier")
    classifier = data_io.load_model(prefix="forest_")

    print("Making predictions")
    predictions = classifier.predict_proba(features)[:,1]
    predictions = list(predictions)

    author_predictions = defaultdict(list)
    paper_predictions = {}

    for (a_id, p_id), pred in zip(author_paper_ids, predictions):
        author_predictions[a_id].append((pred, p_id))

    for author_id in sorted(author_predictions):
        paper_ids_sorted = sorted(author_predictions[author_id], reverse=True)
        paper_predictions[author_id] = [x[1] for x in paper_ids_sorted]

    print("Writing predictions to file")
    data_io.write_submission(paper_predictions, prefix="forest_")

예제 #16

0

파일 보기

파일: predict-logReg.py 프로젝트: pratapbhanu/misc

def main():
    print("Getting features for valid papers from the database")
    data = data_io.get_features_db("ValidPaper")
    author_paper_ids = [x[:2] for x in data]
    features = [x[2:] for x in data]

    featuresfloat = []
    for tup in features:
        a, b, c, d, e = tup
        featuresfloat.append(
            (float(a), float(b), float(c), float(d), float(e)))
    print("Totoal number of samples: ", len(featuresfloat))

    print("Loading the logistic regression model")
    logistic = data_io.load_model()

    print("Making predictions")
    predictions = logistic.predict_proba(featuresfloat)[:, 1]
    predictions = list(predictions)

    author_predictions = defaultdict(list)
    paper_predictions = {}

    for (a_id, p_id), pred in zip(author_paper_ids, predictions):
        author_predictions[a_id].append((pred, p_id))

    for author_id in sorted(author_predictions):
        paper_ids_sorted = sorted(author_predictions[author_id], reverse=True)
        paper_predictions[author_id] = [x[1] for x in paper_ids_sorted]

    print("Writing predictions to file")
    data_io.write_submission(paper_predictions)

예제 #17

0

파일 보기

파일: predict-logReg.py 프로젝트: pratapbhanu/misc

def main():
    print("Getting features for valid papers from the database")
    data = data_io.get_features_db("ValidPaper")
    author_paper_ids = [x[:2] for x in data]
    features = [x[2:] for x in data]

    featuresfloat = []
    for tup in features:
       a, b, c, d, e = tup
       featuresfloat.append((float(a), float(b), float(c), float(d), float(e)))
    print("Totoal number of samples: ", len(featuresfloat))

    print("Loading the logistic regression model")
    logistic = data_io.load_model()

    print("Making predictions")
    predictions = logistic.predict_proba(featuresfloat)[:,1]
    predictions = list(predictions)

    author_predictions = defaultdict(list)
    paper_predictions = {}

    for (a_id, p_id), pred in zip(author_paper_ids, predictions):
        author_predictions[a_id].append((pred, p_id))

    for author_id in sorted(author_predictions):
        paper_ids_sorted = sorted(author_predictions[author_id], reverse=True)
        paper_predictions[author_id] = [x[1] for x in paper_ids_sorted]

    print("Writing predictions to file")
    data_io.write_submission(paper_predictions)

예제 #18

0

파일 보기

def main():
    print("Reading test data")
    test_chunks = data_io.read_test_features()
    test = pandas.concat([chunk for chunk in test_chunks], ignore_index=True)

    feature_names = list(test.columns)
    #feature_names.remove("date_time")

    features = test[feature_names].values

    print("Loading the classifier")
    classifiers = data_io.load_model()

    print("Making predictions")
    #orig_predictions = classifier.predict_proba(features)
    #multiplier = 2 ** classifier.classes_
    #predictions = orig_predictions * multiplier
    #predictions = predictions.sum(axis=1)
    predictions = class_probabilities(features, classifiers)
    print predictions
    predictions = list(-1.0 * predictions)
    recommendations = zip(test["srch_id"], test["prop_id"], predictions)

    print("Writing predictions to file")
    data_io.write_submission(recommendations)

예제 #19

0

파일 보기

def main():

    print "sklearn version", pkg_resources.get_distribution(
        "scikit-learn").version
    print "numpy version", pkg_resources.get_distribution("numpy").version
    print "pandas version", pkg_resources.get_distribution("pandas").version
    print("Loading the classifier")
    clf = data_io.load_model()

    X = data_io.load_matlab_valid_features()
    X = delete_unused_columns(X)
    X = X.fillna(0)

    if (X is None):
        print("No feature file found!")
        exit(1)

    print_importances(X, clf, 0.0)
    print("Predictions outcomes with shape: " + str(X.shape))
    print clf
    predictions = clf.predict(X)
    #predictions = clf.predict_pruned(X,3000)

    predictions = predictions.flatten()

    print("Writing predictions to file")
    data_io.write_submission(predictions)

예제 #20

0

파일 보기

파일: predict.py 프로젝트: yutong91/TaobaoCompetition2014

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-tv',
                        type=float,
                        action='store',
                        dest='threshold_val',
                        help='specify how to generate recommendation result.')
    parser.add_argument('-t',
                        type=int,
                        action='store',
                        dest='target',
                        help='for validation or test dataset')

    if len(sys.argv) != 5:
        print 'Command e.g.: python predict.py -tv 0.8 -t 0(1)'
        sys.exit(1)

    para = parser.parse_args()
    if para.target == 0:
        file_name = settings["MTLR_TEST_FILE"]
        gbt_feature_file = settings["MTLR_FEATURE_FILE"]
    elif para.target == 1:
        file_name = settings["MTLR_TEST_FILE_FOR_SUBMIT"]
        gbt_feature_file = settings["MTLR_FEATURE_FILE_FOR_SUBMIT"]

    writer = csv.writer(open(gbt_feature_file, "w"), lineterminator="\n")
    classifier = data_io.load_model(settings["MTLR_MODEL_FILE"])
    #print classifier.coef_
    #raw_input()

    user_recommend_result = defaultdict(list)
    finished_num = 0
    features = []
    user_product_ids = []
    cache_uid = -1
    for i, entry in enumerate(csv.reader(open(file_name))):
        feature = map(float, entry[2:])
        uid, pid = map(int, entry[:2])
        if i == 0:
            cache_uid = uid
        if uid != cache_uid:
            predictions = classifier.predict_proba(user_product_ids, features)
            #predictions = classifier.predict(features)
            for (t_uid, t_pid), pred in zip(user_product_ids, predictions):
                writer.writerow([t_uid, t_pid, pred])
                if pred > para.threshold_val:
                    user_recommend_result[t_uid].append(t_pid)
            features = [feature]
            user_product_ids = [[uid, pid]]
            cache_uid = uid
            finished_num += 1
            #print("FINISHED UID NUM: %d. " % (finished_num))
            #sys.stderr.write("\rFINISHED UID NUM: %d. " % (finished_num))
            #sys.stderr.flush()
        else:
            features.append(feature)
            user_product_ids.append([uid, pid])

    data_io.write_submission(user_recommend_result)

예제 #21

0

파일 보기

파일: reverse_predictions.py 프로젝트: sjuvekar/Kaggle-Expedia-Raking

def main():
    submission_path = data_io.get_paths()["submission_path"]
    reader = csv.reader(open(submission_path))
    reader.next()  # skipping the header
    recommendations = [(int(row[0]), int(row[1]), -i)
                       for i, row in enumerate(reader)]
    out_path = submission_path[:-4] + "Reversed.csv"
    data_io.write_submission(recommendations, submission_path=out_path)

예제 #22

0

파일 보기

파일: reverse_predictions.py 프로젝트: XiaowenLei/ExpediaPersonalizedSortCompetition

def main():
    submission_path = data_io.get_paths()["submission_path"]
    reader = csv.reader(open(submission_path))
    reader.next() # skipping the header
    recommendations = [(int(row[0]), int(row[1]), -i)
        for i,row in enumerate(reader)]
    out_path = submission_path[:-4]+"Reversed.csv"
    data_io.write_submission(recommendations, submission_path=out_path)

예제 #23

0

파일 보기

파일: train.py 프로젝트: yutong91/TaobaoCompetition2014

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-Csampling',
        type=str,
        action='store',
        dest='sample_method',
        help='specify which sampling method.\n'
        'Currently including three sampling method:\t1.uniform\n\t'
        '2.adaptive pairwise sampling')
    parser.add_argument(
        '-Cbehavior',
        type=str,
        action='store',
        dest='behavior_num',
        help='specify whether to utilize various behaviours of users')
    parser.add_argument(
        '-Init',
        type=str,
        action='store',
        dest='init_choice',
        help='specify which method to initialize model parameters')
    parser.add_argument(
        '-Retrain',
        type=str,
        action='store',
        dest='retrain_choice',
        help='specify which method to initialize model parameters')
    parser.add_argument('-topk',
                        type=int,
                        action='store',
                        dest='topk',
                        help='specify how many products to be recommended')

    if len(sys.argv) != 11:
        print 'Command e.g.: python train.py -Retrain True -Init zero(gaussian) '\
                + '-Csampling uniform(adaptive) -Cbehavior triple(tuple) -topk 4'
        sys.exit(1)

    para = parser.parse_args()
    #genTrainFile(para.behavior_num)
    #genTrainFile1(para.behavior_num)
    genTrainFile2()
    #bpr = BPR()
    #bpr1 = BPR()
    bpr2 = BPR()
    if para.retrain_choice == "True":
        bpr2.model_init(settings["BPR_TRAIN_FILE"], para.init_choice)
        bpr2.train()
        recommend_result = bpr2.genRecommendResult(True, para.topk,
                                                   settings["BPR_TRAIN_FILE"],
                                                   para.init_choice)
        write_submission(recommend_result)
    else:
        recommend_result = bpr2.genRecommendResult(False, para.topk,
                                                   settings["BPR_TRAIN_FILE"],
                                                   para.init_choice)
        write_submission(recommend_result)

예제 #24

0

파일 보기

파일: predict.py 프로젝트: MaverickChaser/LinearRegression

def main():
    print("Loading the model")
    model = data_io.load_model()

    print("Making predictions")
    valid = data_io.get_valid_df()
    predictions = model * np.ones(len(valid))

    print("Writing predictions to file")
    data_io.write_submission(predictions)

예제 #25

0

파일 보기

파일: testOrderBenchmark.py 프로젝트: sjuvekar/Kaggle-Expedia-Raking

def main():
    print("Reading test data")
    test = data_io.read_test()

    ordinals = np.arange(len(test))

    recommendations = zip(test["srch_id"], test["prop_id"], ordinals)

    print("Writing predictions to file")
    data_io.write_submission(recommendations, "testOrderBenchmark.csv")

예제 #26

0

파일 보기

파일: predict.py 프로젝트: alxsoares/ScikitLearnTutorial

def main():
    print("Loading the classifier")
    classifier = data_io.load_model()
    
    print("Making predictions") 
    test = data_io.get_test()
    predictions = classifier.predict(test)  

    print("Writing predictions to file")
    data_io.write_submission(predictions)

예제 #27

0

파일 보기

파일: predict.py 프로젝트: yolidozy/Kdd2013AuthorPaperIdentification

def main():
    print("Reading the test data") 
    test = data_io.read_test()

    print("Making predictions")
    np.random.seed(12341234) 
    predictions = test.apply(shuffle, axis=1)

    print("Writing predictions to file")
    data_io.write_submission(predictions)

예제 #28

0

파일 보기

파일: predict.py 프로젝트: justefg/LinearRegression

def main():
    print("Loading the model")
    model = data_io.load_model()

    print("Making predictions")
    valid = data_io.get_valid_df()
    predictions = model * np.ones(len(valid))

    print("Writing predictions to file")
    data_io.write_submission(predictions)

예제 #29

0

파일 보기

파일: testOrderBenchmark.py 프로젝트: XiaowenLei/ExpediaPersonalizedSortCompetition

def main():
    print("Reading test data")
    test = data_io.read_test()

    ordinals = np.arange(len(test))

    recommendations = zip(test["srch_id"], test["prop_id"], ordinals)

    print("Writing predictions to file")
    data_io.write_submission(recommendations, "testOrderBenchmark.csv")

예제 #30

0

파일 보기

파일: predict.py 프로젝트: subodhchhabra/digits

def main():
    print("Loading the classifier")
    classifier = data_io.load_model()

    print("Making predictions")
    valid = data_io.get_valid_df()
    predictions = classifier.predict(valid)
    predictions = np.rint(predictions)  # Round predictions to nearest integer.

    print("Writing predictions to file")
    data_io.write_submission(predictions)

예제 #31

0

파일 보기

def main():
    print("Loading the classifier")
    classifier = data_io.load_model()

    print("Making predictions")
    valid = data_io.get_valid_df()
    predictions = classifier.predict(valid)
    predictions = predictions.reshape(len(predictions), 1)

    print("Writing predictions to file")
    data_io.write_submission(predictions)

예제 #32

0

파일 보기

파일: predict.py 프로젝트: BillyRen/Salary_Prediction

def main():
    print("Loading the classifier")
    classifier = data_io.load_model()
    
    print("Making predictions") 
    valid = data_io.get_valid_df()
    predictions = classifier.predict(valid)   
    predictions = predictions.reshape(len(predictions), 1)

    print("Writing predictions to file")
    data_io.write_submission(predictions)

예제 #33

0

파일 보기

파일: predict.py 프로젝트: ThunderShiviah/digits

def main():
    print("Loading the classifier")
    classifier = data_io.load_model()
    
    print("Making predictions") 
    valid = data_io.get_valid_df()
    predictions = classifier.predict(valid)   
    predictions = np.rint(predictions) # Round predictions to nearest integer.

    print("Writing predictions to file")
    data_io.write_submission(predictions)

예제 #34

0

파일 보기

파일: predict.py 프로젝트: anthonylife/TaobaoCompetition2014

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-tv', type=float, action='store',
            dest='threshold_val', help='specify how to generate recommendation result.')
    parser.add_argument('-t', type=int, action='store',
            dest='target', help='for validation or test dataset')

    if len(sys.argv) != 5:
        print 'Command e.g.: python predict.py -tv 0.8 -t 0(1)'
        sys.exit(1)

    para = parser.parse_args()
    if para.target == 0:
        file_name = settings["MTLR_TEST_FILE"]
        gbt_feature_file = settings["MTLR_FEATURE_FILE"]
    elif para.target == 1:
        file_name = settings["MTLR_TEST_FILE_FOR_SUBMIT"]
        gbt_feature_file = settings["MTLR_FEATURE_FILE_FOR_SUBMIT"]

    writer = csv.writer(open(gbt_feature_file, "w"), lineterminator="\n")
    classifier = data_io.load_model(settings["MTLR_MODEL_FILE"])
    #print classifier.coef_
    #raw_input()

    user_recommend_result = defaultdict(list)
    finished_num = 0
    features = []
    user_product_ids = []
    cache_uid = -1
    for i, entry in enumerate(csv.reader(open(file_name))):
        feature = map(float, entry[2:])
        uid, pid = map(int, entry[:2])
        if i == 0:
            cache_uid = uid
        if uid != cache_uid:
            predictions = classifier.predict_proba(user_product_ids, features)
            #predictions = classifier.predict(features)
            for (t_uid, t_pid), pred in zip(user_product_ids, predictions):
                writer.writerow([t_uid, t_pid, pred])
                if pred > para.threshold_val:
                    user_recommend_result[t_uid].append(t_pid)
            features = [feature]
            user_product_ids = [[uid, pid]]
            cache_uid = uid
            finished_num += 1
            #print("FINISHED UID NUM: %d. " % (finished_num))
            #sys.stderr.write("\rFINISHED UID NUM: %d. " % (finished_num))
            #sys.stderr.flush()
        else:
            features.append(feature)
            user_product_ids.append([uid, pid])

    data_io.write_submission(user_recommend_result)

예제 #35

0

파일 보기

파일: averageRating.py 프로젝트: anthonylife/ReviewBasedRatingPrediction

 def predict(self, submission_path):
     prediction_result = []
     for line in open(self.tedata_path):
         uid, pid = line.strip("\r\t\n").split(" ")[:2]
         if self.m_choice == 0:
             prediction_result.append([uid, pid, self.ave_rating[uid]])
         elif self.m_choice == 1:
             prediction_result.append([uid, pid, self.ave_rating[pid]])
         else:
             print 'Invalid choice of average rating method!'
             sys.exit(1)
     write_submission(prediction_result, submission_path)

예제 #36

0

파일 보기

파일: predict.py 프로젝트: xiaoyugit/grass_mud_horse

def main():
    valid = data_io.get_valid_df()
    P={}
    for key in valid:
        print("Loading the classifier for %s" %key)
        classifier = data_io.load_model(key)  
        print("Making predictions") 
        P[key] = classifier.predict(valid[key])   
        P[key] = P[key].reshape(len(P[key]), 1)

    print("Writing predictions to file")
    data_io.write_submission(P)

예제 #37

0

파일 보기

 def predict(self, submission_path):
     prediction_result = []
     for line in open(self.tedata_path):
         uid, pid = line.strip("\r\t\n").split(" ")[:2]
         if self.m_choice == 0:
             prediction_result.append([uid, pid, self.ave_rating[uid]])
         elif self.m_choice == 1:
             prediction_result.append([uid, pid, self.ave_rating[pid]])
         else:
             print 'Invalid choice of average rating method!'
             sys.exit(1)
     write_submission(prediction_result, submission_path)

예제 #38

0

파일 보기

파일: predict_parallel.py 프로젝트: shubhampachori12110095/KDDCUP2013_SYSU

def main():
    comm = MPI.COMM_WORLD
    size = comm.Get_size()   
    rank = comm.Get_rank()
    conn = data_io.get_db_conn()
    feature_name = open("feature_list.txt").read().split()
    # if size < len(feature_name):	# to be done!
    for table_name in ["ValidPaper"]:
	if rank > 0:
            # getting features by parallel computing
	    print "getting features at node " + str(rank)
            feature = data_io_parallel.get_features_db_parallel(conn, rank, table_name, feature_name[rank - 1])
	else:
	    feature = data_io_parallel.get_trained_validation_data(conn, table_name)
	    
	# sending features to rank 0
	print "sending features to node " + str(rank)
	features = comm.gather(feature, root = 0)
        #print features
	if rank == 0:	  
	    temp = []
	    for f in features:
		temp.extend(f)  	    
	    print "Successfully got the features from " + table_name
	    data = map(list, np.array(temp).T)
    
    if rank == 0:
	author_paper_ids = [x[:2] for x in data]
	features = [x[2:] for x in data]

	print("Loading the classifier")
	classifier = data_io.load_model()
	print classifier.feature_importances_

	print("Making predictions")
	predictions = classifier.predict_proba(features)[:,1]
	predictions = list(predictions)

	author_predictions = defaultdict(list)
	paper_predictions = {}

	for (a_id, p_id), pred in zip(author_paper_ids, predictions):
	    author_predictions[a_id].append((pred, p_id))

	for author_id in sorted(author_predictions):
            paper_ids_sorted = sorted(author_predictions[author_id], reverse=True)
            paper_predictions[author_id] = [x[1] for x in paper_ids_sorted]

	print("Writing predictions to file")
	data_io.write_submission(paper_predictions)
	print "Prediction completed, exit..."
        comm.Abort()

예제 #39

0

파일 보기

파일: predict.py 프로젝트: Diviyan-Kalainathan/causal-humans

def main():
    print("Reading the valid pairs")
    valid = data_io.read_valid_pairs()

    print("Loading the classifier")
    classifier = data_io.load_model()

    print("Making predictions")
    predictions = classifier.predict(valid)
    predictions = predictions.flatten()

    print("Writing predictions to file")
    data_io.write_submission(predictions)

예제 #40

0

파일 보기

파일: predict.py 프로젝트: kespindler/causeeffect

def main():
    print("Reading the valid pairs") 
    valid = data_io.read_valid_pairs()

    print("Loading the classifier")
    classifier = data_io.load_model()

    print("Making predictions") 
    predictions = classifier.predict(valid)
    predictions = predictions.flatten()

    print("Writing predictions to file")
    data_io.write_submission(predictions)

예제 #41

0

파일 보기

 def run(self):
     valid = self.getValidationDataset()
     if f.preprocessedFeatures != []:
         intermediate = data_io.read_intermediate_valid()
         for i in f.preprocessedFeatures:
             valid[i] = intermediate[i]
     print "Loading the classifier"
     classifier = data_io.load_model()
     print "Making predictions"
     predictions = classifier.predict(valid)
     predictions = predictions.flatten()
     print "Writing predictions to file"
     data_io.write_submission(predictions)

예제 #42

0

파일 보기

파일: predict.py 프로젝트: ragib06/cause-effect-pairs

def main():
    cf = ClassifierFactory()

    filename = None
    modelnames = ["basic_python_benchmark"]
    numRows = None

    try:
        opts, args = getopt.getopt(sys.argv[1:], "f:m:n:h")
    except getopt.GetoptError as err:
        print str(err)
        sys.exit(2)

    for o, a in opts:
        if o == "-f":
            filename = a
        elif o == "-n":
            numRows = int(a)
        elif o == "-m":
            if a == "all":
                modelnames = []
                for clf_key in cf.get_all_keys():
                    modelnames.append(clf_key)
            elif cf.is_valid_key(a):
                modelnames = [a]
        elif o == "-h":
            print 'options:'
            print "\t -m [classifier key | all]"
            print "\t -f [filename]"
            sys.exit(0)
        else:
            print "try help: python predict.py -h"
            sys.exit(1)

    print "Reading the test pairs"
    test = data_io.read_test_pairs(numRows)
    testInfo = data_io.read_test_info(numRows)
    test['A type'] = testInfo['A type']
    test['B type'] = testInfo['B type']

    for modelname in modelnames:
        print "Loading the classifier:", cf.get_classifier_name(modelname)
        classifier = data_io.load_model(modelname)

        print "Making predictions"
        predictions = classifier.predict(test)
        predictions = predictions.flatten()

        filename = modelname + '.csv'

        data_io.write_submission(predictions, filename)

예제 #43

0

파일 보기

파일: timeitemcf.py 프로젝트: anthonylife/TaobaoCompetition2014

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-t', type=int, action='store',
            dest='target', help='for validation or test dataset')
    parser.add_argument('-tv', type=float, action='store', dest='threshold_val',
            help='specify threshold value.')
    parser.add_argument('-s1', type=float, action='store', dest='click_score',
            help='specify the score of user click behavior.')
    parser.add_argument('-s2', type=float, action='store', dest='collect_score',
            help='specify the score of user click behavior.')
    parser.add_argument('-s3', type=float, action='store', dest='buy_score',
            help='specify the score of user click behavior.')
    parser.add_argument('-ta', type=float, action='store', dest='time_alpha',
            help='specify the decay parameter for training')
    parser.add_argument('-tb', type=float, action='store', dest='time_beta',
            help='specify the decay parameter for prediction')
    parser.add_argument('-month', type=int, action='store', dest='month',
            help='specify the month when the recommendation being generated.')
    parser.add_argument('-day', type=int, action='store', dest='day',
            help='specify the day when the recommendation being generated.')

    if len(sys.argv) != 19:
        print 'Command e.g.: python itemcf.py -t (1) -tv 1.5 -s1 1 -s2 2 -s3 4 -ta 0.5 -tb 0.5 -month 7 -day 15'
        sys.exit(1)

    para = parser.parse_args()
    if para.target == 0:
        data = [entry for entry in csv.reader(open(settings["TRAIN_DATA_FILE"]))]
    elif para.target == 1:
        data = [entry for entry in csv.reader(open(settings["TAR_DATA_FILE"]))]
    else:
        print 'Invalid train data target choice...'
        sys.exit(1)
    data = [map(int, entry) for entry in data[1:]]
    user_behavior = getUserBehavior(data)
    user_inverted_index = createdInvertedIndex(data)
    #user_inverted_index = createdInvertedIndex1(data)
    #sim_items = itemSimilarity(user_inverted_index, para.click_score,
    #        para.collect_score, para.buy_score, para.time_alpha)
    sim_items = itemSimilarity1(user_inverted_index, para.click_score,
            para.collect_score, para.buy_score, para.time_alpha)
    #recommend_result = genRecommendResult(sim_items, user_behavior, para.click_score,
    #        para.collect_score, para.buy_score, para.time_alpha, para.threshold_val,
    #        para.month, para.day)
    recommend_result = genRecommendResult1(sim_items, user_behavior, para.click_score,
            para.collect_score, para.buy_score, para.time_beta, para.threshold_val,
            para.month, para.day, para.target)

    write_submission(recommend_result)

예제 #44

0

파일 보기

파일: globalPopular.py 프로젝트: anthonylife/NextPlace

 def recommendation(self, submission_path):
     index_extent = (-90, -180, 90, 180)
     ndimx = int((index_extent[3]-index_extent[1])/settings["GRID_LNG"])
     ndimy = int((index_extent[2]-index_extent[0])/settings["GRID_LAT"])
     recommendation_result = {}
     for i, entry in enumerate(csv.reader(open(self.tedata_path))):
         uid, pid1 = int(entry[0]), int(entry[1])
         near_grids = getNearGridsForPOI(self.pois_latlng[pid1], ndimx, ndimy, True)
         candidate_pois = []
         for grididx in near_grids:
             candidate_pois += self.grids_pois[grididx[0]][grididx[1]]
         pois_score = [[poi, self.pois_popularity[poi]] for poi in candidate_pois]
         result = sorted(pois_score, key=lambda x:x[1], reverse=True)[:settings["MAX_TOPK"]]
         recommendation_result[i] = [pair[0] for pair in result]
         print i
     write_submission(recommendation_result, submission_path)

예제 #45

0

파일 보기

파일: predict.py 프로젝트: tlingf/CauseEffectPairsChallenge

def main():
    print("Reading the valid pairs") 
    valid = data_io.read_valid_pairs()
    valid_info = data_io.read_valid_info()
    valid = pd.concat([valid, valid_info],axis =1) 
    valid = train.get_types(valid)

    print("Loading the classifier")
    classifier = data_io.load_model()

    print("Making predictions") 
    predictions = classifier.predict(valid)
    predictions = predictions.flatten()

    print("Writing predictions to file")
    data_io.write_submission(predictions, fn)

예제 #46

0

파일 보기

파일: predict.py 프로젝트: dpatnaik/kaggle-learn

def main():
    print("Loading the test data")
    classifier = data_io.load_model()

    print("Load test data. And Clean..")
    test = data_io.get_test_df()
    test = FeatureConverter().clean_data(test)
    passengerIds = test['Id']
    test.drop(['Id'], axis=1, inplace=True)
    test = test.values

    print("Making predictions")
    predictions = classifier.predict(test).astype(int)
    #predictions = predictions.reshape(len(predictions), 1)

    print("Writing predictions to file")
    data_io.write_submission(predictions, passengerIds, ['Id', 'Cover_Type'])

예제 #47

0

파일 보기

def main():
    
    print("Getting features for valid papers from the database")
    if(os.path.exists("features_valid.obj")):
        with open("features_valid.obj", 'r') as loadfile:
            data = cPickle.load(loadfile)
    else:
        data = data_io.get_features_db("ValidPaper")
        with open("features_valid.obj", 'w') as dumpfile:
            cPickle.dump(data, dumpfile, protocol=cPickle.HIGHEST_PROTOCOL)
    
    author_paper_ids = [x[:2] for x in data]
    features = [x[2:] for x in data]
    
    predictInts = []
    for tup in features:
        a, b, c, d, e = tup
        predictInts.append((int(a), int(b), int(c), int(d), int(e)))

    print("Loading the classifier")
    mlp = data_io.load_model(prefix="mlp_")

    print("Making predictions")
    predictions = []
    for x in predictInts : 
        #Propagate the inputs forward to compute the outputs             
        outp = list(x)     #output of  input layer i.e. output of previous layer to be used as input for next layer
        for layer in mlp.layers[1:] :           #for all layers starting from the second layer
            for i in range(layer.nNeurons):
                layer.net[i] =  weightedSum(outp, layer.W[1:,i]) + layer.W[0,i]
                layer.out[i] = g(layer.net[i], layer.transferF)   #pass this weighted sum through the transfer function of this layer                  
                outp = layer.out  
        predictions.append(mlp.layers[-1].out[0])

    author_predictions = defaultdict(list)
    paper_predictions = {}

    for (a_id, p_id), pred in zip(author_paper_ids, predictions):
        author_predictions[a_id].append((pred, p_id))

    for author_id in sorted(author_predictions):
        paper_ids_sorted = sorted(author_predictions[author_id], reverse=True)
        paper_predictions[author_id] = [x[1] for x in paper_ids_sorted]

    print("Writing predictions to file")
    data_io.write_submission(paper_predictions, prefix="mlp_")

예제 #48

0

파일 보기

파일: predict.py 프로젝트: dpatnaik/ForestCover

def main():
    print("Loading the test data")
    classifier = data_io.load_model()
    
    print ("Load test data. And Clean..")
    test = data_io.get_test_df()
    test = FeatureConverter().clean_data(test)
    passengerIds = test['Id']
    test.drop(['Id'], axis = 1, inplace = True)
    test = test.values
    
    print("Making predictions") 
    predictions = classifier.predict(test).astype(int)
    #predictions = predictions.reshape(len(predictions), 1)
    
    print("Writing predictions to file")
    data_io.write_submission(predictions, passengerIds, ['Id', 'Cover_Type'])

예제 #49

0

파일 보기

파일: predict.py 프로젝트: pratapbhanu/misc

def main():
    print("Getting features for valid papers from the database")
    if(os.path.exists("features_valid.obj")):
        with open("features_valid.obj", 'r') as loadfile:
            data = cPickle.load(loadfile)
    else:
        data = data_io.get_features_db("ValidPaper")
        with open("features_valid.obj", 'w') as dumpfile:
            cPickle.dump(data, dumpfile, protocol=cPickle.HIGHEST_PROTOCOL)
    author_paper_ids = [x[:2] for x in data]
    features = [x[2:] for x in data]
    
    #code for including keywords match feature
    print "adding addtional features..."
    import additional_features as af
    all_features = af.get_additional_features()    
    _, _, kw_features = all_features    
    for i in range(len(features)):
        features[i]+= tuple(kw_features[i][2:])
    
    featuresnp = np.array(features, dtype='int32')
        
#    featuresnp -= np.mean(featuresnp, axis=0)
#    featuresnp /= np.std(featuresnp, axis=0)
    
    
    print("Loading the classifier")
    classifier = data_io.load_model(prefix="forest_")

    print("Making predictions")
    predictions = classifier.predict_proba(featuresnp)[:,1]
    predictions = list(predictions)

    author_predictions = defaultdict(list)
    paper_predictions = {}

    for (a_id, p_id), pred in zip(author_paper_ids, predictions):
        author_predictions[a_id].append((pred, p_id))

    for author_id in sorted(author_predictions):
        paper_ids_sorted = sorted(author_predictions[author_id], reverse=True)
        paper_predictions[author_id] = [x[1] for x in paper_ids_sorted]

    print("Writing predictions to file")
    data_io.write_submission(paper_predictions, prefix="forest_")

예제 #50

0

파일 보기

파일: predict.py 프로젝트: thao9611/KDD-Cup

def predict_write(data, predict_type):
    author_paper_ids = [x[:2] for x in data]
    features = [x[2:] for x in data]

    print("Loading the classifier")
    classifier = data_io.load_model()

    print("Making predictions")

    features = np.array(features)  # This line is for xgboost
    predictions = classifier.predict_proba(features)[:, 1]
    predictions = list(predictions)

    author_predictions = defaultdict(list)
    paper_predictions = {}

    if (predict_type == "valid"):
        targetset = pd.read_csv('dataRev2/Valid.csv')
    else:
        targetset = pd.read_csv('dataRev2/Test.csv')

    parsed_counter = parse_targetset_maintain_duplicate(targetset)

    for (a_id, p_id), pred in zip(author_paper_ids, predictions):
        author_predictions[a_id].append((pred, p_id))

    for author_id in sorted(author_predictions):

        paper_ids_sorted = sorted(author_predictions[author_id], reverse=True)

        new_result = []
        for x in paper_ids_sorted:
            pid = x[1]
            for i in range(parsed_counter[author_id, pid]):
                new_result.append(pid)

        paper_predictions[author_id] = new_result
        #paper_predictions[author_id] = [x[1] for x in paper_ids_sorted]

        paper_predictions[author_id] = processDuplicates(
            paper_predictions[author_id])

    print("Writing predictions to file")
    data_io.write_submission(paper_predictions, predict_type)

예제 #51

0

파일 보기

파일: predict.py 프로젝트: Penningmeester/Boekhouding-Annemarie

def main():
    test = data_io.read_test()
    ## deal with the NAs, and add features
    train.feature_eng(test)

    ## predict the booking_bool
    print("Loading the Booking classifier..")
    tstart = datetime.now()
    classifier = data_io.load_model(True)
    print("Time used,")
    print(datetime.now() - tstart)
    print("Making predictions on the booking_bool..")
    tstart = datetime.now()
    b_fnames = train.get_features(test, True)
    b_test_f = test[b_fnames].values
    b_prob = classifier.predict_proba(b_test_f)[:, 1]
    b_prob = list(-1.0 * b_prob)
    print("Time used,")
    print(datetime.now() - tstart)

    ## predict the click_bool
    print("Loading the Click classifier..")
    tstart = datetime.now()
    classifier = data_io.load_model(False)
    print("Time used,")
    print(datetime.now() - tstart)
    print("Making predictions on the click_bool..")
    tstart = datetime.now()
    c_fnames = train.get_features(test, False)
    c_test_f = test[c_fnames].values
    c_prob = classifier.predict_proba(c_test_f)[:, 1]
    c_prob = list(-1.0 * c_prob)
    print("Time used,")
    print(datetime.now() - tstart)

    ## Making Recommendations
    recommendations = zip(test["srch_id"], test["prop_id"],
                          4 * b_prob + c_prob)

    print("Writing predictions to file..")
    tstart = datetime.now()
    data_io.write_submission(recommendations)
    print("Time used,")
    print(datetime.now() - tstart)

예제 #52

0

파일 보기

파일: runPopularity.py 프로젝트: yutong91/TaobaoCompetition2014

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-topk',
        type=int,
        action='store',
        dest='topk',
        help='specify the number of products to be recommended to\
                    users, 0 stands for using user personal average.')

    if len(sys.argv) != 3:
        print 'Command e.g.: python runPopularity -topk 5'
        sys.exit(1)
    para = parser.parse_args()

    products = genPopularList(settings["TRAIN_DATA_FILE"])
    user_average_buy = getAverageUserBuy(para.topk)
    recommend_result = genRecommendResult(products, user_average_buy)
    write_submission(recommend_result)

예제 #53

0

파일 보기

def main():
    print("Reading the valid pairs")
    valid = data_io.read_valid_pairs()
    features = fe.feature_extractor()
    print("Transforming features")
    trans_valid = features.fit_transform(valid)
    trans_valid = np.nan_to_num(trans_valid)

    print("Saving Valid Features")
    data_io.save_valid_features(trans_valid)

    print("Loading the classifier")
    #(both_classifier, A_classifier, B_classifier, none_classifier) = data_io.load_model()
    classifier = data_io.load_model()

    print("Making predictions")
    valid_info = data_io.read_valid_info()
    predictions = list()
    curr_pred = None
    """
    for i in range(len(trans_valid)):
      
      if valid_info["A type"][i] == "Numerical" and valid_info["B type"][i] == "Numerical":
        curr_pred = both_classifier.predict_proba(trans_valid[i, :])
      
      elif valid_info["A type"][i] == "Numerical" and valid_info["B type"][i] != "Numerical":
        curr_pred = A_classifier.predict_proba(trans_valid[i, :])
      
      elif valid_info["A type"][i] != "Numerical" and valid_info["B type"][i] == "Numerical":
        curr_pred = B_classifier.predict_proba(trans_valid[i, :])
     
      else:
        curr_pred = none_classifier.predict_proba(trans_valid[i, :])

      predictions.append(curr_pred[0][2] - curr_pred[0][0])
    """

    orig_predictions = classifier.predict_proba(trans_valid)
    predictions = orig_predictions[:, 2] - orig_predictions[:, 0]
    predictions = predictions.flatten()

    print("Writing predictions to file")
    data_io.write_submission(predictions)