def make_map(query, only_terms=False, file_format='svg', include_svg_dimensions=False, starting_year=None, ending_year=None, sample_size=None, evaluation_output_path=None, term_type=TermExtraction.Phrases, data_dump_path=None, n_layers=0, graph_attrs=None, **kwargs): documents = filter_query(query, starting_year=starting_year, ending_year=ending_year, sample_size=sample_size) extracted_terms = extract_terms(documents, term_type) map_dict, graph_terms, phrase_frequencies, similarities, scored_phrases = map_representation(extracted_terms, data_dump_path=data_dump_path, **kwargs) print type(similarities) # map_string will be a graphviz-processable string print 'here' map_string = write_dot.output_pairs_dict(map_dict, False, true_scaling=True, phrase_frequencies=phrase_frequencies, similarities=similarities, phrase_scores=scored_phrases, n_layers=n_layers, graph_attrs=graph_attrs) print map_string if evaluation_output_path: import evaluation evaluation.plot_phrase_frequencies(phrase_frequencies, evaluation_output_path) evaluation.plot_edge_weight_distribution(map_dict, evaluation_output_path) if only_terms: return '\n'.join(sorted([' '.join(tpl) for tpl in graph_terms])) if file_format == 'raw': return map_string else: map_ = call_graphviz(map_string, file_format) if file_format == 'svg' and not include_svg_dimensions: return strip_dimensions(map_) else: return map_
def make_basemap(basemap): try: set_status('getting document list', model=basemap) with ManagedSession() as session: filtered_query = create_query_for_model(session, basemap, dirty=False) extracted_terms = extract_terms(filtered_query, basemap.term_type) if not extracted_terms: raise Exception('No documents found matching query!') map_dict, graph_terms, phrase_frequencies, unnormed_dict, phrase_scores = map_representation(extracted_terms, ranking_algorithm=basemap.ranking_algorithm, similarity_algorithm=basemap.similarity_algorithm, filtering_algorithm=basemap.filtering_algorithm, number_of_terms=basemap.number_of_terms, model=basemap) # map_string will be a graphviz-processable string # map_string = write_dot.output_pairs_dict(map_dict, True, phrase_frequencies=phrase_frequencies, true_scaling=True).decode('ascii', 'ignore') map_string = write_dot.output_pairs_dict(map_dict, True, phrase_frequencies=phrase_frequencies, true_scaling=True, similarities=unnormed_dict, phrase_scores=phrase_scores).decode('ascii', 'ignore') # save to database basemap.dot_rep = map_string # basemap.phrase_frequencies = json.dumps(jsonize_phrase_dict(phrase_frequencies), indent=4).decode('ascii', 'ignore') # get phrases as a list of lists of strings (one list of words per term) basemap.phrases_in_map = json.dumps(jsonize_phrase_set(graph_terms, None)).decode('ascii', 'ignore') basemap.save() svg_str, width, height = strip_dimensions(call_graphviz(map_string, file_format='svg', model=basemap)) basemap.svg_rep = svg_str basemap.width = width basemap.height = height basemap.finished = True basemap.save() set_status('basemap complete', model=basemap) print 'basemap complete' return map_dict, graph_terms except ZeroDivisionError as e: set_status('Error: too few documents to produce a map. Try a broader search', model=basemap)
def make_map(query, only_terms=False, file_format='svg', include_svg_dimensions=False, starting_year=None, ending_year=None, sample_size=None, evaluation_output_path=None, term_type=TermExtraction.Phrases, data_dump_path=None, n_layers=0, graph_attrs=None, **kwargs): documents = filter_query(query, starting_year=starting_year, ending_year=ending_year, sample_size=sample_size) extracted_terms = extract_terms(documents, term_type) map_dict, graph_terms, phrase_frequencies, similarities, scored_phrases = map_representation( extracted_terms, data_dump_path=data_dump_path, **kwargs) print type(similarities) # map_string will be a graphviz-processable string print 'here' map_string = write_dot.output_pairs_dict( map_dict, False, true_scaling=True, phrase_frequencies=phrase_frequencies, similarities=similarities, phrase_scores=scored_phrases, n_layers=n_layers, graph_attrs=graph_attrs) print map_string if evaluation_output_path: import evaluation evaluation.plot_phrase_frequencies(phrase_frequencies, evaluation_output_path) evaluation.plot_edge_weight_distribution(map_dict, evaluation_output_path) if only_terms: return '\n'.join(sorted([' '.join(tpl) for tpl in graph_terms])) if file_format == 'raw': return map_string else: map_ = call_graphviz(map_string, file_format) if file_format == 'svg' and not include_svg_dimensions: return strip_dimensions(map_) else: return map_