return None return w def add_words_from_title(words, title, stop_words, lem): for word in title.split(): w = word_filter(word, stop_words, lem) if w: words.append(w) if __name__ == "__main__": args = interface() faculty = load.load_assistant_profs(open(args.input_file)) load.load_all_publications(faculty, args.dblp_dir, args.gs_dir) lem = WordNetLemmatizer() stop_words = stopwords.words('english') if args.custom_stops: custom_words = [ word_filter(w.strip(), [], lem) for w in open(args.custom_stops, 'rU') ] stop_words += custom_words for f in faculty: tag = None if 'dblp_pubs' in f: tag = f['dblp']
return None return w def add_words_from_title(words, title, stop_words, lem): for word in title.split(): w = word_filter(word, stop_words, lem) if w: words.append(w) if __name__=="__main__": args = interface() inst = institution_parser.parse_institution_records(open(args.inst_file)) faculty = load.load_assistant_profs(open(args.fac_file), inst) load.load_all_publications(faculty, args.dblp_dir, args.gs_dir) lem = WordNetLemmatizer() stop_words = stopwords.words('english') if args.custom_stops: custom_words = [word_filter(w.strip(), [], lem) for w in open(args.custom_stops, 'rU')] stop_words += custom_words written = 0 for f in faculty: tag = None words = [] if 'dblp_pubs' in f: tag = f['dblp'] for pub in f['dblp_pubs']:
name = line.split(':', 1)[-1].strip() if name == next_name: output.write('# dblp_n : %d\n' % num_papers[next_ind]) output.write('# dblp_n_2011 : %d\n' % num_papers_2011[next_ind]) next_ind += 1 if next_ind < max_ind: next_name = names[next_ind] else: done = True if not done: print 'WARNING: failed to link all z-scores!' output.close() if __name__ == "__main__": args = interface() inst = institution_parser.parse_institution_records(open(args.inst_file)) faculty = load_assistant_profs(open(args.faculty_file, 'rU'), inst) load.load_all_publications(faculty, args.dblp_dir, gs_dir=None) dists, tots = get_paper_counts_by_topic(faculty) means, stds = get_topic_means_stds(dists, tots) print means print stds set_zscores(faculty, means, stds) #add_zscores_to_file(faculty, args.faculty_file, args.output_file) add_counts_to_file(faculty, args.faculty_file, args.output_file)
name = line.split(':', 1)[-1].strip() if name == next_name: output.write('# dblp_n : %d\n' % num_papers[next_ind]) output.write('# dblp_n_2011 : %d\n' % num_papers_2011[next_ind]) next_ind += 1 if next_ind < max_ind: next_name = names[next_ind] else: done = True if not done: print 'WARNING: failed to link all z-scores!' output.close() if __name__=="__main__": args = interface() inst = institution_parser.parse_institution_records(open(args.inst_file)) faculty = load_assistant_profs(open(args.faculty_file, 'rU'), inst) load.load_all_publications(faculty, args.dblp_dir, gs_dir=None) dists, tots = get_paper_counts_by_topic(faculty) means, stds = get_topic_means_stds(dists, tots) print means print stds set_zscores(faculty, means, stds) #add_zscores_to_file(faculty, args.faculty_file, args.output_file) add_counts_to_file(faculty, args.faculty_file, args.output_file)