def test_favours_shorter_matches(monkeypatch): """When tie between matches, favour shorter rather than first to occur""" monkeypatch.setattr( process, 'extractBests', lambda *args, **kwargs: [ ('some long thing', 100), ('thing', 100) ] ) monkeypatch.setattr( docset, 'index', lambda l: { 'thing': { 'path': 'path/for/thing', 'junk': "anything else can be in here; it doesn't matter", } } ) docs_entry = identify('any-language', 'thing') expected_doc_path = \ PATH.joinpath('any-language').joinpath('path/for/thing.html') assert docs_entry.path == expected_doc_path assert docs_entry.entry_id is None
def index(): if request.method =='POST': print("Post method") if 'file' not in request.files: print("No file") flash('No file part') return redirect(request.url) file = request.files['file'] if file.filename == '': print("No file is selected") flash('No file is selected') return redirect(request.url) if file and allowed_file(file.filename): filename = secure_filename(file.filename) print(os.path.join(app.config['UPLOAD_FOLDER'])) file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) # print(os.path.join(app.config['UPLOAD_FOLDERC'])) # file.save(os.path.join(app.config['UPLOAD_FOLDERC'], filename)) print(filename + " is saved and done !!") apply_threshold(filename) dig_string = identify() print(dig_string) return render_template('home.html', strings = dig_string, img = file.filename) return render_template('home.html')
def test_basic_identification_4(): # Many matches for 'Array' for JavaScript, but without further # distinguishing information makes sense to have main 'Array' entry as the # top match. docs_entry = identify('javascript', 'Array') assert docs_entry.path == \ PATH.joinpath('javascript/global_objects/array.html') assert docs_entry.entry_id is None
def evaluate(): # for each original file in the SpatialML corpus all_files = os.listdir(SPATIALML_RAW_DIR) totals = [0, 0, 0, 0] for spatialml_file in all_files: print("Evaluating {}...".format(spatialml_file)) # process file to obtain list of identified locations text = read_from_file(SPATIALML_RAW_DIR + spatialml_file) corenlp_tagged_text = corenlp_tag_text(text) identified_locations = identify(corenlp_tagged_text) # extract the "gold standard" locations from corresponding file in simplified corpus spatialml = read_from_file(SPATIALML_SIMPLE_DIR + spatialml_file) corpus_locations = get_locations_from_spatialml(spatialml) # TODO handle better - currently just skip where no matches either way if len(identified_locations) > 0 and len(corpus_locations) > 0: results = evaluate_identified_locs_against_corpus_locs(identified_locations, corpus_locations) for index in range(len(results)): totals[index] += results[index] else: all_files.remove(spatialml_file) print(totals) num_tests = len(all_files) averages = [] for value in totals: averages.append(value / num_tests) av_recog_prec = averages[0] av_recog_recall = averages[2] print(av_recog_prec, av_recog_recall) print("Recog F-measure ", harmonic_mean(av_recog_prec, av_recog_recall)) av_disambig_prec = averages[1] av_disambig_recall = averages[3] print("Disambig F-measure ", harmonic_mean(av_disambig_prec, av_disambig_recall))
def identify_spatialml_raw_locations(disambiguation_function, pickled_dir): """ Main logic of script - for all raw SpatialML files run pipeline on with given disambiguation function and pickle resulting list of locations to a corresponding file in pickled_dir. """ print("Running pipeline on raw SpatialML files using disambiguation function {}...\n" .format(disambiguation_function)) for spatialml_file in os.listdir(config.SPATIALML_RAW_DIR): print("Processing {}...".format(spatialml_file)) # run text in file through pipeline to get list of IdentifiedLocations text = utilities.read_from_file(config.SPATIALML_RAW_DIR + spatialml_file) corenlp_tagged_text = corenlp_interface.corenlp_tag_text(text) locations = identification.identify(corenlp_tagged_text, disambiguation_function) # TODO deal with using different disambig methods better # pickle locations to corresponding file in corresponding dir with open(pickled_dir + spatialml_file, 'wb') as pickle_file: pickle.dump(locations, pickle_file) print("\n\n")
def identify_card(): # Read Image and Setcode from Request Form img_uri = request.forms.get('image') setcode = request.forms.get('setcode') card_dict = identification.identify(img_uri, setcode) return card_dict
def test_basic_identification_2(): docs_entry = identify('javascript', '!=') assert docs_entry.path == PATH.joinpath( 'javascript/operators/comparison_operators.html') assert docs_entry.entry_id == 'Inequality'
rr, cc = draw.disk((7.5, 7.5), radius=8.5) struct[rr, cc] = 1 # https://stackoverflow.com/a/41495033 # if we need to find an appropriate precipitation threshold, we can create a histogram from the data to aid in finding a # precipitation threshold viz.histogram(filled_data, 3, (0, 2)) # the algorithms take 3d (Time x Rows x Cols) arrays, so if we have a 2d array, we need to reshape it to include a phony # third dimension if binary_data.ndim == 2: binary_data = binary_data.reshape(1, binary_data.shape[0], binary_data.shape[1]) # to compute the identification algorithm, we simply supply the data and our morphological structure labeled_maps = idf.identify(binary_data, struct) # to see the results of our work, we can use show_save_storms, but first let's try a different colormap cmap = plt.get_cmap('hsv') viz.show_save_storms(labeled_maps, cmap, 'Identified Storms 1996', 1, show_save='show') # to save the result, we can use np save np.save('labeled_maps.npy', labeled_maps) # and to load it, we can use np load # labeled_maps = np.load('labeled_maps.npy', allow_pickle=True)
#Extract plain text from json source plain_source_text = json_to_text.extract_data(data, args.limit) #returns a string if args.postagging.lower() == 'on': #Annotate plain source text with POS tags annotated_data = sparv_annotation.annotate(plain_source_text) #returns a list of sentences #Anonymize the POS annotated data output_data, labeled_words = sparv_identification.identify(plain_source_text, annotated_data) add_wordlists_to_genre(genre, labeled_words) elif args.postagging.lower() == 'off': #Anonymize data without POS annotation output_data = identification.identify(plain_source_text) else: print("Pos arguments must be 'on' or 'off'") #Count occurrences per label labels = count_occurrences_per_label(output_data, genre) #If there are more than one genre assigned to an essay, add the labels for all genres if ',' in genre: sev_genres = genre.split(', ') for g in sev_genres: add_labels_to_genre(g, labels) else: add_labels_to_genre(genre, labels) else:
# Modules import argparse, json #import label_personal_info import anonymize_personal_info import identification parser = argparse.ArgumentParser(description='Program takes an input and output text file together with output formats') parser.add_argument('--input', '-i', type=str, required=True) parser.add_argument('--output', '-o', type=str, required=True) args = parser.parse_args() if __name__ == '__main__': print("Running main file") # Read input file, only text file with open(args.input) as file: data = file.read() output_data = identification.identify(data) # Save the output to the path in args.output if args.output: with open(args.output, 'w') as file: json.dump(output_data, file) #file = open(args.output, 'w') #file.write(output_data)
def _perform_search(doc_set, search_term): doc_set = docset.from_identifier(doc_set) docs_entry = identification.identify(doc_set, search_term) with docs_entry.path.open() as f: return doc_parser.parse(f, docs_entry)
def identify_signs(self): edit_folder_path = self.pictures_data[0].data["File Path"].rpartition( "/")[0].rpartition("/")[0] + "/edit_folder" if os.path.isdir(edit_folder_path): result = messagebox.askquestion( "Delete", "There is a Edit-Image-Folder! Delete it?", icon='warning') if result == 'yes': shutil.rmtree(self.pictures_data[0].data["File Path"]. rpartition("/")[0].rpartition("/")[0] + "/edit_folder") os.mkdir(edit_folder_path) ##### Identificate the Signs ##### self.pictures_data, self.result_global_A, self.result_global_V, self.anzahl_A, self.anzahl_V, self.anzahl_A_B, self.anzahl_V_B, self.anzahl_A_FP, self.anzahl_V_FP, self.result_global_S, self.result_global_G, self.anzahl_S, self.anzahl_G, self.anzahl_S_B, self.anzahl_G_B, self.anzahl_S_FP, self.anzahl_G_FP = identify( self.pictures_data, edit_folder_path) self.display_result(self.pictures_data[self.currentDisplayedResult - 1])
''' main.py: This class is used to run the back end classes: identification, quantification and get nutritional information Values are returned to the server by being printed. All identified foods followed by the total fat and the number of foods identified are printed. The number of foods identified is used to return the foods to the Android application. ''' ap = argparse.ArgumentParser() ap.add_argument("-i", "--image", required = True, help = "Path to the image") args = vars(ap.parse_args()) img = cv2.imread(args["image"]) fatPerGram = [] mass = [] result = identify(img) if not result: #If there are no foods identified print("No Foods were identified in this image") print(0) print(0) mass = quantify(result, img) for m in mass: #Each foods corresponding fat per gram is added to the list fatPerGram.append(nutrients(m[0]['label'])) i=0 fat = 0 for f in fatPerGram: print(mass[i][0]["label"],": Mass =",round(mass[i][1],2),"g") fat = fat+f*mass[i][1] i=i+1 print(fat)
def test_basic_identification_3(): docs_entry = identify('http', '404') assert docs_entry.path == PATH.joinpath('http/status/404.html') assert docs_entry.entry_id is None
def test_favours_exact_partial_match(): """Given `method` give `method (attribute)` rather than e.g. `meta`""" docs_entry = identify('html', 'method') assert docs_entry.path == PATH.joinpath('html/attributes.html') assert docs_entry.entry_id == 'method-attribute'
def map_locations(url=None, file=None, display_map=False): """ Main logic of program, perform entire pipeline on the text indicated by the command line arguments given, writing each stage of the pipeline to files in the results directory. """ # exit if neither url nor file given if url is None and file is None: print("A url or file must be given to read content to process from, see help (-h or --help option) for more " "information.") exit(1) # starting message loc = url if file is None else file print("Starting map_locations for {}...".format(loc)) # obtain the content to process if file is not None: # read content from file print("Reading article from file...") title = file content = utilities.read_from_file(file) elif url is not None: # make request to Readability API for url print("Obtaining article from url...") readability_response = readability_interface.readability_request(url) title = readability_response['title'] html_content = readability_response['content'] content = BeautifulSoup(html_content).get_text() # form results directory for article print("Forming results directory for article...") results_dir = make_results_dir(title) # store content of article print("Writing article content to file...") content_file = results_dir + '01_content.txt' utilities.write_to_file(content_file, content) # tag file using Stanford CoreNLP server print("Tagging named entities in article...") try: corenlp_tagged_text = corenlp_interface.corenlp_tag_text(content) except ConnectionRefusedError as ex: # print (most likely) reason for error, trace, and quit print("Stanford CoreNLP server must be run to tag named entities! (settings in config.py)") ex.with_traceback() # store tagged article print("Writing tagged article to file...") corenlp_tagged_file = results_dir + '02_corenlp_tagged.xml' utilities.write_to_file(corenlp_tagged_file, corenlp_tagged_text) # disambiguate identified locations to find most likely candidate (candidates written to files in disambiguate()) print("Disambiguating identified locations...") identified_locations = identification.identify(corenlp_tagged_text, results_dir) # print("\n********************", identified_locs_to_xml(identified_locations, corenlp_tagged_text), "*******************\n") # form kml for identified locations print("Creating kml for article locations...") kml = kml_generation.create_kml(identified_locations) print("Writing kml to file...") relative_kml_file = '04_kml.kml' kml_file = results_dir + relative_kml_file utilities.write_to_file(kml_file, kml) print("Creating html files for map...") # map html file with open(config.CONTEXT_DIR + config.MAP_VIEW_TEMPLATE) as template_file: template = string.Template(template_file.read()) html = template.substitute(kml_file=relative_kml_file, title=title) map_html_file = results_dir + '05_map_view.html' utilities.write_to_file(map_html_file, html) # article html file with open(config.CONTEXT_DIR + config.ARTICLE_TEMPLATE) as template_file: template = string.Template(template_file.read()) # Form article content html, adding bold tags around identified locations. # find positions of all ided locs and add bold tags in reverse order so positions don't shift content_html_list = list(content) positions = {} for ided_loc in identified_locations: positions[ided_loc.start] = ided_loc.stop start_positions = reversed(sorted(positions.keys())) for start_pos in start_positions: stop_pos = positions[start_pos] content_html_list.insert(stop_pos-1, '</b>') content_html_list.insert(start_pos-1, '<b>') # replace newlines with paragraphs for index, el in enumerate(content_html_list): if el == '\n': content_html_list[index] = '<p>' content_html = ''.join(content_html_list) # create and save the html html = template.substitute(article_title=title, article_content=content_html) article_html_file = results_dir + '06_identified_locs.html' utilities.write_to_file(article_html_file, html) if display_map: print("Opening map...") # webbrowser.open_new_tab(article_html_file) webbrowser.open_new_tab(map_html_file) print("Map: file://" + map_html_file) print("map_locations successfully completed for {}.\n".format(loc))
import identification as idf import matplotlib.pyplot as plt import numpy as np import visualization as viz from skimage import draw import tracking as tr # load some initial precip data precip_data = np.load('tutorial_files/precip_1996.npy', allow_pickle=True) # set a precip threshold and narrow your region of interest THRESHOLD = 0.6 trimmed_data = np.where(precip_data < THRESHOLD, 0, precip_data) # create a structural set struct = np.zeros((16, 16)) rr, cc = draw.disk((7.5, 7.5), radius=8.5) struct[rr, cc] = 1 # identify your storms labeled_maps = idf.identify(trimmed_data, struct) # visualize your data cmap = plt.get_cmap('hsv') viz.show_save_storms(labeled_maps, cmap, 'Identified Storms 1996', 1, show_save='save')
def test_basic_identification_1(): docs_entry = identify('html', 'input') assert docs_entry.path == PATH.joinpath('html/element/input.html') assert docs_entry.entry_id is None