import warnings from pylab import * with warnings.catch_warnings(): warnings.simplefilter("ignore") import pandas as pd import numpy as np from feature_extraction.dataLoader import DataLoader from feature_extraction import calcFeatures from utilities.plotBucket import plotBucket from utilities.plotBucket import plotBar from utilities.plotBucket import plotFrequencyHistogram import math import random loader = DataLoader() loader.loadAll(distance = False) print "Calculating Features" calcFeatures.calcAuthorsPastPapers(loader) calcFeatures.calcTopConfsJoursCount(loader) calcFeatures.computeAverages(loader) df = pd.read_pickle( "savedFrames/predictionFeatures/paperTable") exp = 'maxTopPaperCount' target = 'avgRating' numBuckets = 7 percentiles = (100.0/numBuckets)*np.arange(numBuckets + 1) buckets = np.percentile(df[exp].values, percentiles.tolist()) buckets[0] = -1 averages = []
import warnings import numpy as np with warnings.catch_warnings(): warnings.simplefilter("ignore") from pandas import DataFrame from feature_extraction.dataLoader import DataLoader loader = DataLoader() loader.loadAll() reviewTable = [] metaReviewTable = [] bidTable = [] paperTable = [] userTable = [] for id, review in loader.reviews.iteritems(): maxDist = 7 sumDist = 0 dists = [] for author in review.paper.authors: if author.id in review.user.distances: dist = review.user.distances[author.id] sumDist += dist dists.append(dist) else: sumDist += maxDist dists.sort()
import warnings from feature_extraction import calcFeatures import numpy as np with warnings.catch_warnings(): warnings.simplefilter("ignore") from pandas import DataFrame from feature_extraction.dataLoader import DataLoader from feature_extraction.tfIdf import tf_idf loader = DataLoader() loader.loadAll() tfidf = tf_idf() tfidf.store_tf_idf(loader) print "Calculating Features" calcFeatures.calcAuthorsPastPapers(loader) calcFeatures.calcTopConfsJoursCount(loader) calcFeatures.computeAverages(loader) calcFeatures.computeDistances(loader.reviews) print "Constructing Paper Table" paperTable = [] for id, paper in loader.papers.iteritems(): maxAuthor = sorted(paper.authors, key=lambda a: len(a.pastPapers))[-1] maxTopAuthor = sorted(paper.authors, key=lambda a: a.topPastPapers)[-1] maxKDDAuthor = sorted(paper.authors, key=lambda a: a.topKDDPast)[-1] numAuthors = len(paper.authors)