return None
    return w


def add_words_from_title(words, title, stop_words, lem):
    for word in title.split():
        w = word_filter(word, stop_words, lem)
        if w:
            words.append(w)


if __name__ == "__main__":
    args = interface()

    faculty = load.load_assistant_profs(open(args.input_file))
    load.load_all_publications(faculty, args.dblp_dir, args.gs_dir)

    lem = WordNetLemmatizer()
    stop_words = stopwords.words('english')
    if args.custom_stops:
        custom_words = [
            word_filter(w.strip(), [], lem)
            for w in open(args.custom_stops, 'rU')
        ]
        stop_words += custom_words

    for f in faculty:
        tag = None

        if 'dblp_pubs' in f:
            tag = f['dblp']
Ejemplo n.º 2
0
        return None
    return w


def add_words_from_title(words, title, stop_words, lem):
    for word in title.split():
        w = word_filter(word, stop_words, lem)
        if w:
            words.append(w)


if __name__=="__main__":
    args = interface()
    inst = institution_parser.parse_institution_records(open(args.inst_file))
    faculty = load.load_assistant_profs(open(args.fac_file), inst)
    load.load_all_publications(faculty, args.dblp_dir, args.gs_dir)

    lem = WordNetLemmatizer()
    stop_words = stopwords.words('english')
    if args.custom_stops:
        custom_words = [word_filter(w.strip(), [], lem) for w in open(args.custom_stops, 'rU')]
        stop_words += custom_words

    written = 0
    for f in faculty:
        tag = None
        words = []
    
        if 'dblp_pubs' in f:
            tag = f['dblp']
            for pub in f['dblp_pubs']:
Ejemplo n.º 3
0
            name = line.split(':', 1)[-1].strip()
            if name == next_name:
                output.write('# dblp_n      : %d\n' % num_papers[next_ind])
                output.write('# dblp_n_2011 : %d\n' %
                             num_papers_2011[next_ind])
                next_ind += 1
                if next_ind < max_ind:
                    next_name = names[next_ind]
                else:
                    done = True

    if not done:
        print 'WARNING: failed to link all z-scores!'

    output.close()


if __name__ == "__main__":
    args = interface()

    inst = institution_parser.parse_institution_records(open(args.inst_file))
    faculty = load_assistant_profs(open(args.faculty_file, 'rU'), inst)
    load.load_all_publications(faculty, args.dblp_dir, gs_dir=None)
    dists, tots = get_paper_counts_by_topic(faculty)
    means, stds = get_topic_means_stds(dists, tots)
    print means
    print stds
    set_zscores(faculty, means, stds)
    #add_zscores_to_file(faculty, args.faculty_file, args.output_file)
    add_counts_to_file(faculty, args.faculty_file, args.output_file)
Ejemplo n.º 4
0
            name = line.split(':', 1)[-1].strip()
            if name == next_name:
                output.write('# dblp_n      : %d\n' % num_papers[next_ind])
                output.write('# dblp_n_2011 : %d\n' % num_papers_2011[next_ind])
                next_ind += 1
                if next_ind < max_ind:
                    next_name = names[next_ind]
                else:
                    done = True

    if not done:
        print 'WARNING: failed to link all z-scores!'

    output.close()
        

if __name__=="__main__":
    args = interface()
    
    inst = institution_parser.parse_institution_records(open(args.inst_file))
    faculty = load_assistant_profs(open(args.faculty_file, 'rU'), inst)
    load.load_all_publications(faculty, args.dblp_dir, gs_dir=None)
    dists, tots = get_paper_counts_by_topic(faculty)
    means, stds = get_topic_means_stds(dists, tots)
    print means
    print stds
    set_zscores(faculty, means, stds)
    #add_zscores_to_file(faculty, args.faculty_file, args.output_file)
    add_counts_to_file(faculty, args.faculty_file, args.output_file)