Esempio n. 1
0
import warnings
from pylab import *
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    import pandas as pd
    import numpy as np
from feature_extraction.dataLoader import DataLoader
from feature_extraction import calcFeatures
from utilities.plotBucket import plotBucket
from utilities.plotBucket import plotBar
from utilities.plotBucket import plotFrequencyHistogram
import math
import random

loader = DataLoader()
loader.loadAll(distance = False)
print "Calculating Features"
calcFeatures.calcAuthorsPastPapers(loader)
calcFeatures.calcTopConfsJoursCount(loader)
calcFeatures.computeAverages(loader)

df = pd.read_pickle(
    "savedFrames/predictionFeatures/paperTable")
exp = 'maxTopPaperCount'
target = 'avgRating'

numBuckets = 7
percentiles = (100.0/numBuckets)*np.arange(numBuckets + 1)
buckets = np.percentile(df[exp].values, percentiles.tolist())
buckets[0] = -1
averages = []
import warnings
import numpy as np

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    from pandas import DataFrame

from feature_extraction.dataLoader import DataLoader

loader = DataLoader()
loader.loadAll()

reviewTable = []
metaReviewTable = []
bidTable = []
paperTable = []
userTable = []

for id, review in loader.reviews.iteritems():
    maxDist = 7
    sumDist = 0
    dists = []
    for author in review.paper.authors:
        if author.id in review.user.distances:
            dist = review.user.distances[author.id]
            sumDist += dist

            dists.append(dist)
        else:
            sumDist += maxDist
    dists.sort()
import warnings
from feature_extraction import calcFeatures
import numpy as np

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    from pandas import DataFrame

from feature_extraction.dataLoader import DataLoader
from feature_extraction.tfIdf import tf_idf

loader = DataLoader()
loader.loadAll()
tfidf = tf_idf()
tfidf.store_tf_idf(loader)

print "Calculating Features"
calcFeatures.calcAuthorsPastPapers(loader)
calcFeatures.calcTopConfsJoursCount(loader)
calcFeatures.computeAverages(loader)
calcFeatures.computeDistances(loader.reviews)

print "Constructing Paper Table"
paperTable = []

for id, paper in loader.papers.iteritems():

    maxAuthor = sorted(paper.authors, key=lambda a: len(a.pastPapers))[-1]
    maxTopAuthor = sorted(paper.authors, key=lambda a: a.topPastPapers)[-1]
    maxKDDAuthor = sorted(paper.authors, key=lambda a: a.topKDDPast)[-1]
    numAuthors = len(paper.authors)