Esempio n. 1
0
def applymodel(modelpath, metadatapath, outpath):
    sourcefolder = '/Users/tunder/Dropbox/GenreProject/python/reception/poetryEF/fromEF'
    extension = '.tsv'
    newmetadict = train.apply_pickled_model(modelpath, sourcefolder, extension,
                                            metadatapath)
    print('Got predictions for that model.')
    newmetadict.to_csv(outpath)
def applymodel():
    modelpath = input('Path to model? ')
    sourcefolder = '/Users/tunder/Dropbox/GenreProject/python/reception/fiction/fromEF'
    extension = '.tsv'
    metadatapath = 'mergedmeta.csv'
    newmetadict = train.apply_pickled_model(modelpath, sourcefolder, extension, metadatapath)
    print('Got predictions for that model.')
    outpath = 'mergedmeta.csv'
    newmetadict.to_csv(outpath)
Esempio n. 3
0
def applymodel():
    modelpath = input('Path to model? ')
    sourcefolder = '/Users/tunder/Dropbox/GenreProject/python/reception/fiction/fromEF'
    extension = '.tsv'
    metadatapath = 'mergedmeta.csv'
    newmetadict = train.apply_pickled_model(modelpath, sourcefolder, extension, metadatapath)
    print('Got predictions for that model.')
    outpath = 'mergedmeta.csv'
    newmetadict.to_csv(outpath)
Esempio n. 4
0
def applymodel(modelpath, metapath, outpath):
    ''' This function applies a specified model (modelpath) to a specified
    metadata set (metapath), and sends the results to outpath.
    '''

    sourcefolder = '/Users/tunder/data/character_subset/'
    extension = '.tsv'
    metadatapath = metapath = '../metadata/balanced_character_subset.csv'
    newmetadict = train.apply_pickled_model(modelpath, sourcefolder, extension,
                                            metadatapath)
    print('Got predictions for that model.')
    newmetadict.to_csv(outpath)
Esempio n. 5
0
        featurestart = 3000
        featureend = 4400
        featurestep = 50
        genre_gridsearch('alldetective', c_range, featurestart, featureend, featurestep, positive_tags)

    elif args[1] == 'compare':
        # This routine assumes that you have already trained models for classes A and B.
        # It compares them.
        firstmodel = args[2]
        secondmodel = args[3]
        firstpath = '/Users/tunder/Dropbox/fiction/results/' + firstmodel + '.pkl'
        secondpath = '/Users/tunder/Dropbox/fiction/results/' + secondmodel + '.pkl'
        metadatapath = '/Users/tunder/Dropbox/fiction/meta/concatenatedmeta.csv'
        sourcefolder = '/Users/tunder/Dropbox/fiction/newtsvs'
        extension = '.tsv'
        firstonall = train.apply_pickled_model(firstpath, sourcefolder, extension, metadatapath)
        # firstonall.set_index('docid', inplace = True)
        secondonall = train.apply_pickled_model(secondpath, sourcefolder, extension, metadatapath)
        # secondonall.set_index('docid', inplace = True)
        firstonself = pd.read_csv('/Users/tunder/Dropbox/fiction/results/' + firstmodel + '.csv', index_col = 'volid')
        secondonself = pd.read_csv('/Users/tunder/Dropbox/fiction/results/' + secondmodel + '.csv', index_col = 'volid')

        firsttotal, firstright = comparison(firstonself, secondonall, secondmodel)
        secondtotal, secondright = comparison(secondonself, firstonall, firstmodel)

        print(firsttotal, firstright)
        print("Accuracy of " + secondmodel + " on volumes originally included in "+ firstmodel + ": " + str(firstright/firsttotal))
        print(secondtotal, secondright)
        print("Accuracy of " + firstmodel + " on volumes originally included in "+ secondmodel + ": " + str(secondright/secondtotal))
        print("Total accuracy: ", (firstright + secondright) / (firsttotal + secondtotal))
Esempio n. 6
0
def futurepredict():
    differences = dict()
    dates = dict()
    authors = dict()
    titles = dict()
    genders = dict()

    for iteration in range(5):
        print('ITERATION: ' + str(iteration))
        for center in range(1930, 1970, 10):
            print('CENTER: ' + str(center))
            floor = center - 30
            firstmodel = "SF" + str(iteration) + '-' + str(floor) + "-" + str(
                center)
            ceiling = center + 30
            secondmodel = "SF" + str(iteration) + '-' + str(
                center) + "-" + str(ceiling)

            firstpath = '../modeloutput/' + firstmodel + '.pkl'

            metadatapath = '../metadata/concatenatedmeta.csv'
            sourcefolder = '/Users/tunder/Dropbox/fiction/newtsvs'
            extension = '.tsv'
            firstonall = train.apply_pickled_model(firstpath, sourcefolder,
                                                   extension, metadatapath)

            secondonself = pd.read_csv('../modeloutput/' + secondmodel +
                                       '.csv',
                                       index_col='volid')

            for docid in secondonself.index:
                # print(docid, firstmodel)
                date = secondonself.loc[docid, 'dateused']
                realclass = int(secondonself.loc[docid, 'realclass'])
                selfprediction = secondonself.loc[docid, 'logistic']

                if date < 1930 or date > 1976:
                    continue

                if realclass == 0:
                    continue

                try:
                    predictionfrompast = firstonall.loc[docid, firstmodel]
                    # this gets the prediction about this book made by a model in the past
                except:
                    try:
                        predictionfrompast = firstonall.loc[str(docid),
                                                            firstmodel]
                    except:
                        print(docid, firstmodel, secondonself.loc[docid,
                                                                  'title'])
                        continue

                diff = predictionfrompast - selfprediction

                if docid not in differences:
                    differences[docid] = []

                differences[docid].append(diff)
                dates[docid] = date
                authors[docid] = secondonself.loc[docid, 'author']
                titles[docid] = secondonself.loc[docid, 'title']
                genders[docid] = secondonself.loc[docid, 'gender']

    with open('../plotdata/sfsurprises.tsv', mode='w', encoding='utf-8') as f:
        writer = csv.DictWriter(
            f,
            delimiter='\t',
            fieldnames=['docid', 'date', 'gender', 'diff', 'author', 'title'])
        writer.writeheader()
        for docid, date in dates.items():
            o = dict()
            o['docid'] = docid
            o['date'] = date
            o['diff'] = sum(differences[docid]) / len(differences[docid])
            o['author'] = authors[docid]
            o['title'] = titles[docid]
            o['gender'] = genders[docid]
            writer.writerow(o)