예제 #1
0
def test_favours_shorter_matches(monkeypatch):
    """When tie between matches, favour shorter rather than first to occur"""
    monkeypatch.setattr(
        process, 'extractBests',
        lambda *args, **kwargs: [
            ('some long thing', 100),
            ('thing', 100)
        ]
    )
    monkeypatch.setattr(
        docset, 'index',
        lambda l: {
            'thing': {
                'path': 'path/for/thing',
                'junk': "anything else can be in here; it doesn't matter",
            }
        }
    )

    docs_entry = identify('any-language', 'thing')

    expected_doc_path = \
        PATH.joinpath('any-language').joinpath('path/for/thing.html')
    assert docs_entry.path == expected_doc_path

    assert docs_entry.entry_id is None
def index():
    if request.method =='POST':
        print("Post method")

        if 'file' not in request.files:
            print("No file")
            flash('No file part')
            return redirect(request.url)
        file = request.files['file']

        if file.filename == '':
            print("No file is selected")
            flash('No file is selected')
            return redirect(request.url)

        if file and allowed_file(file.filename):
            filename = secure_filename(file.filename)
            print(os.path.join(app.config['UPLOAD_FOLDER']))
            file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))

            # print(os.path.join(app.config['UPLOAD_FOLDERC']))
            # file.save(os.path.join(app.config['UPLOAD_FOLDERC'], filename))

            print(filename + " is saved and done !!")
            apply_threshold(filename)
            dig_string = identify()
            print(dig_string)
            return render_template('home.html', strings = dig_string, img = file.filename)

    return  render_template('home.html')
예제 #3
0
def test_basic_identification_4():
    # Many matches for 'Array' for JavaScript, but without further
    # distinguishing information makes sense to have main 'Array' entry as the
    # top match.
    docs_entry = identify('javascript', 'Array')
    assert docs_entry.path == \
        PATH.joinpath('javascript/global_objects/array.html')
    assert docs_entry.entry_id is None
예제 #4
0
def evaluate():
    # for each original file in the SpatialML corpus
    all_files = os.listdir(SPATIALML_RAW_DIR)

    totals = [0, 0, 0, 0]

    for spatialml_file in all_files:

        print("Evaluating {}...".format(spatialml_file))

        # process file to obtain list of identified locations
        text = read_from_file(SPATIALML_RAW_DIR + spatialml_file)
        corenlp_tagged_text = corenlp_tag_text(text)
        identified_locations = identify(corenlp_tagged_text)

        # extract the "gold standard" locations from corresponding file in simplified corpus
        spatialml = read_from_file(SPATIALML_SIMPLE_DIR + spatialml_file)
        corpus_locations = get_locations_from_spatialml(spatialml)

        # TODO handle better - currently just skip where no matches either way
        if len(identified_locations) > 0 and len(corpus_locations) > 0:
            results = evaluate_identified_locs_against_corpus_locs(identified_locations, corpus_locations)
            for index in range(len(results)):
                totals[index] += results[index]
        else:
            all_files.remove(spatialml_file)

        print(totals)

    num_tests = len(all_files)
    averages = []
    for value in totals:
        averages.append(value / num_tests)

    av_recog_prec = averages[0]
    av_recog_recall = averages[2]
    print(av_recog_prec, av_recog_recall)
    print("Recog F-measure ", harmonic_mean(av_recog_prec, av_recog_recall))

    av_disambig_prec = averages[1]
    av_disambig_recall = averages[3]
    print("Disambig F-measure ", harmonic_mean(av_disambig_prec, av_disambig_recall))
def identify_spatialml_raw_locations(disambiguation_function, pickled_dir):
    """ Main logic of script - for all raw SpatialML files run pipeline on with given disambiguation function and
        pickle resulting list of locations to a corresponding file in pickled_dir.
    """

    print("Running pipeline on raw SpatialML files using disambiguation function {}...\n"
          .format(disambiguation_function))

    for spatialml_file in os.listdir(config.SPATIALML_RAW_DIR):

        print("Processing {}...".format(spatialml_file))

        # run text in file through pipeline to get list of IdentifiedLocations
        text = utilities.read_from_file(config.SPATIALML_RAW_DIR + spatialml_file)
        corenlp_tagged_text = corenlp_interface.corenlp_tag_text(text)
        locations = identification.identify(corenlp_tagged_text, disambiguation_function)

        # TODO deal with using different disambig methods better
        # pickle locations to corresponding file in corresponding dir
        with open(pickled_dir + spatialml_file, 'wb') as pickle_file:
            pickle.dump(locations, pickle_file)

    print("\n\n")
예제 #6
0
def identify_card():
    # Read Image and Setcode from Request Form
    img_uri = request.forms.get('image')
    setcode = request.forms.get('setcode')
    card_dict = identification.identify(img_uri, setcode)
    return card_dict
예제 #7
0
def test_basic_identification_2():
    docs_entry = identify('javascript', '!=')
    assert docs_entry.path == PATH.joinpath(
        'javascript/operators/comparison_operators.html')
    assert docs_entry.entry_id == 'Inequality'
예제 #8
0
rr, cc = draw.disk((7.5, 7.5), radius=8.5)
struct[rr, cc] = 1
# https://stackoverflow.com/a/41495033

# if we need to find an appropriate precipitation threshold, we can create a histogram from the data to aid in finding a
# precipitation threshold
viz.histogram(filled_data, 3, (0, 2))

# the algorithms take 3d (Time x Rows x Cols) arrays, so if we have a 2d array, we need to reshape it to include a phony
# third dimension
if binary_data.ndim == 2:
    binary_data = binary_data.reshape(1, binary_data.shape[0],
                                      binary_data.shape[1])

# to compute the identification algorithm, we simply supply the data and our morphological structure
labeled_maps = idf.identify(binary_data, struct)

# to see the results of our work, we can use show_save_storms, but first let's try a different colormap
cmap = plt.get_cmap('hsv')
viz.show_save_storms(labeled_maps,
                     cmap,
                     'Identified Storms 1996',
                     1,
                     show_save='show')

# to save the result, we can use np save
np.save('labeled_maps.npy', labeled_maps)

# and to load it, we can use np load
# labeled_maps = np.load('labeled_maps.npy', allow_pickle=True)
예제 #9
0
            #Extract plain text from json source
            plain_source_text = json_to_text.extract_data(data, args.limit) #returns a string
            
            if args.postagging.lower() == 'on':
                #Annotate plain source text with POS tags
                annotated_data = sparv_annotation.annotate(plain_source_text) #returns a list of sentences
                                                        
                #Anonymize the POS annotated data
                output_data, labeled_words = sparv_identification.identify(plain_source_text, annotated_data)
                
                add_wordlists_to_genre(genre, labeled_words)
                

            elif args.postagging.lower() == 'off':
                #Anonymize data without POS annotation
                output_data = identification.identify(plain_source_text)
            else:
                print("Pos arguments must be 'on' or 'off'")
            
            #Count occurrences per label 
            labels = count_occurrences_per_label(output_data, genre)
                                                                                
            #If there are more than one genre assigned to an essay, add the labels for all genres
            if ',' in genre:
                sev_genres = genre.split(', ')
                for g in sev_genres:
                    add_labels_to_genre(g, labels)
            else:
                add_labels_to_genre(genre, labels)
                
        else:
예제 #10
0
# Modules
import argparse, json
#import label_personal_info
import anonymize_personal_info
import identification

parser = argparse.ArgumentParser(description='Program takes an input and output text file together with output formats')
parser.add_argument('--input', '-i', type=str, required=True)
parser.add_argument('--output', '-o', type=str, required=True)
args = parser.parse_args()

if __name__ == '__main__':
    
    print("Running main file")    
    
    # Read input file, only text file
    with open(args.input) as file:
        data = file.read()
    
    output_data = identification.identify(data)
    
    # Save the output to the path in args.output
    if args.output:
        with open(args.output, 'w') as file:
            json.dump(output_data, file)
        #file = open(args.output, 'w')
        #file.write(output_data)
예제 #11
0
def _perform_search(doc_set, search_term):
    doc_set = docset.from_identifier(doc_set)
    docs_entry = identification.identify(doc_set, search_term)
    with docs_entry.path.open() as f:
        return doc_parser.parse(f, docs_entry)
예제 #12
0
    def identify_signs(self):

        edit_folder_path = self.pictures_data[0].data["File Path"].rpartition(
            "/")[0].rpartition("/")[0] + "/edit_folder"
        if os.path.isdir(edit_folder_path):
            result = messagebox.askquestion(
                "Delete",
                "There is a Edit-Image-Folder! Delete it?",
                icon='warning')
            if result == 'yes':
                shutil.rmtree(self.pictures_data[0].data["File Path"].
                              rpartition("/")[0].rpartition("/")[0] +
                              "/edit_folder")

        os.mkdir(edit_folder_path)

        ##### Identificate the Signs #####
        self.pictures_data, self.result_global_A, self.result_global_V, self.anzahl_A, self.anzahl_V, self.anzahl_A_B, self.anzahl_V_B, self.anzahl_A_FP, self.anzahl_V_FP, self.result_global_S, self.result_global_G, self.anzahl_S, self.anzahl_G, self.anzahl_S_B, self.anzahl_G_B, self.anzahl_S_FP, self.anzahl_G_FP = identify(
            self.pictures_data, edit_folder_path)

        self.display_result(self.pictures_data[self.currentDisplayedResult -
                                               1])
예제 #13
0
''' main.py:
This class is used to run the back end classes: identification, quantification and get nutritional information
Values are returned to the server by being printed. All identified foods followed by the total fat and the number 
of foods identified are printed. The number of foods identified is used to return the foods to the Android application.
'''

ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required = True, help = "Path to the image")
args = vars(ap.parse_args())

img = cv2.imread(args["image"])

fatPerGram = []
mass = []
   
result = identify(img)
if not result: #If there are no foods identified
    print("No Foods were identified in this image")
    print(0)
    print(0)
  
mass = quantify(result, img)
for m in mass: #Each foods corresponding fat per gram is added to the list
    fatPerGram.append(nutrients(m[0]['label']))
i=0
fat = 0
for f in fatPerGram:
    print(mass[i][0]["label"],": Mass =",round(mass[i][1],2),"g")
    fat = fat+f*mass[i][1]
    i=i+1
print(fat)
예제 #14
0
def test_basic_identification_3():
    docs_entry = identify('http', '404')
    assert docs_entry.path == PATH.joinpath('http/status/404.html')
    assert docs_entry.entry_id is None
예제 #15
0
def test_favours_exact_partial_match():
    """Given `method` give `method (attribute)` rather than e.g. `meta`"""
    docs_entry = identify('html', 'method')
    assert docs_entry.path == PATH.joinpath('html/attributes.html')
    assert docs_entry.entry_id == 'method-attribute'
예제 #16
0
def map_locations(url=None, file=None, display_map=False):
    """ Main logic of program, perform entire pipeline on the text indicated by the command line arguments given,
        writing each stage of the pipeline to files in the results directory. """

    # exit if neither url nor file given
    if url is None and file is None:
        print("A url or file must be given to read content to process from, see help (-h or --help option) for more "
              "information.")
        exit(1)

    # starting message
    loc = url if file is None else file
    print("Starting map_locations for {}...".format(loc))

    # obtain the content to process
    if file is not None:
        # read content from file
        print("Reading article from file...")
        title = file
        content = utilities.read_from_file(file)

    elif url is not None:
        # make request to Readability API for url
        print("Obtaining article from url...")
        readability_response = readability_interface.readability_request(url)
        title = readability_response['title']
        html_content = readability_response['content']
        content = BeautifulSoup(html_content).get_text()

    # form results directory for article
    print("Forming results directory for article...")
    results_dir = make_results_dir(title)

    # store content of article
    print("Writing article content to file...")
    content_file = results_dir + '01_content.txt'
    utilities.write_to_file(content_file, content)

    # tag file using Stanford CoreNLP server
    print("Tagging named entities in article...")
    try:
        corenlp_tagged_text = corenlp_interface.corenlp_tag_text(content)
    except ConnectionRefusedError as ex:
        # print (most likely) reason for error, trace, and quit
        print("Stanford CoreNLP server must be run to tag named entities! (settings in config.py)")
        ex.with_traceback()

    # store tagged article
    print("Writing tagged article to file...")
    corenlp_tagged_file = results_dir + '02_corenlp_tagged.xml'
    utilities.write_to_file(corenlp_tagged_file, corenlp_tagged_text)

    # disambiguate identified locations to find most likely candidate (candidates written to files in disambiguate())
    print("Disambiguating identified locations...")
    identified_locations = identification.identify(corenlp_tagged_text, results_dir)


    # print("\n********************", identified_locs_to_xml(identified_locations, corenlp_tagged_text), "*******************\n")


    # form kml for identified locations
    print("Creating kml for article locations...")
    kml = kml_generation.create_kml(identified_locations)

    print("Writing kml to file...")
    relative_kml_file = '04_kml.kml'
    kml_file = results_dir + relative_kml_file
    utilities.write_to_file(kml_file, kml)

    print("Creating html files for map...")

    # map html file
    with open(config.CONTEXT_DIR + config.MAP_VIEW_TEMPLATE) as template_file:
        template = string.Template(template_file.read())
        html = template.substitute(kml_file=relative_kml_file, title=title)
        map_html_file = results_dir + '05_map_view.html'
        utilities.write_to_file(map_html_file, html)

    # article html file
    with open(config.CONTEXT_DIR + config.ARTICLE_TEMPLATE) as template_file:
        template = string.Template(template_file.read())

        # Form article content html, adding bold tags around identified locations.
        # find positions of all ided locs and add bold tags in reverse order so positions don't shift
        content_html_list = list(content)
        positions = {}
        for ided_loc in identified_locations:
            positions[ided_loc.start] = ided_loc.stop

        start_positions = reversed(sorted(positions.keys()))
        for start_pos in start_positions:
            stop_pos = positions[start_pos]
            content_html_list.insert(stop_pos-1, '</b>')
            content_html_list.insert(start_pos-1, '<b>')

        # replace newlines with paragraphs
        for index, el in enumerate(content_html_list):
            if el == '\n':
                content_html_list[index] = '<p>'

        content_html = ''.join(content_html_list)

        # create and save the html
        html = template.substitute(article_title=title, article_content=content_html)
        article_html_file = results_dir + '06_identified_locs.html'
        utilities.write_to_file(article_html_file, html)

    if display_map:
        print("Opening map...")
        # webbrowser.open_new_tab(article_html_file)
        webbrowser.open_new_tab(map_html_file)

    print("Map: file://" + map_html_file)

    print("map_locations successfully completed for {}.\n".format(loc))
예제 #17
0
import identification as idf
import matplotlib.pyplot as plt
import numpy as np
import visualization as viz
from skimage import draw
import tracking as tr

# load some initial precip data
precip_data = np.load('tutorial_files/precip_1996.npy', allow_pickle=True)

# set a precip threshold and narrow your region of interest
THRESHOLD = 0.6
trimmed_data = np.where(precip_data < THRESHOLD, 0, precip_data)

# create a structural set
struct = np.zeros((16, 16))
rr, cc = draw.disk((7.5, 7.5), radius=8.5)
struct[rr, cc] = 1

# identify your storms
labeled_maps = idf.identify(trimmed_data, struct)

# visualize your data
cmap = plt.get_cmap('hsv')
viz.show_save_storms(labeled_maps,
                     cmap,
                     'Identified Storms 1996',
                     1,
                     show_save='save')
예제 #18
0
def test_basic_identification_1():
    docs_entry = identify('html', 'input')
    assert docs_entry.path == PATH.joinpath('html/element/input.html')
    assert docs_entry.entry_id is None