예제 #1
0
def get_dict(*keys, **extras):
    """Returns request dict of given keys."""

    _keys = ('url', 'args', 'form', 'data', 'origin', 'headers', 'files',
             'json', 'method')

    assert all(map(_keys.__contains__, keys))
    data = request.data
    form = semiflatten(request.form)

    try:
        _json = json.loads(data.decode('utf-8'))
    except (ValueError, TypeError):
        _json = None

    d = dict(
        url=get_url(request),
        args=semiflatten(request.args),
        form=form,
        data=json_safe(data),
        origin=request.headers.get('X-Forwarded-For', request.remote_addr),
        headers=get_headers(),
        files=get_files(),
        json=_json,
        method=request.method,
    )

    out_d = dict()

    for key in keys:
        out_d[key] = d.get(key)

    out_d.update(extras)

    return out_d
예제 #2
0
 def get(self, product_id=None, **kwargs):
     """Return a product to edit or an empty form to create"""
     template = 'admin/product/new.html'
     files = get_files()
     #print self.form.photo.choices
     context = {
         'files': files,
         'form': self.form,
     }
     
     # render edit form
     if product_id is not None:
         product = Product.get_by_id(product_id)
         if product:
             self.form = ProductForm(obj=product)
             self.form.tags.data = ', '.join(product.tags)
             product_photo = ''
             if product.photo:
                 product_photo = product.photo.key().id()
             context.update({ 'form': self.form, 'product_photo': product_photo })
             template = 'admin/product/edit.html'
         else:
             return redirect('/admin/shop/')
     # render new form
     return self.render_response(template, **context)
예제 #3
0
def prepare_patient_images(patient_id, intermediate_crop=0):
    file_lst = []
    prefix = str(patient_id).rjust(4, '0')
    src_files = helpers.get_files(settings.BASE_PREPROCESSEDIMAGES_DIR, prefix + "*.png")

    patient_dir = helpers.get_pred_patient_dir(patient_id)
    helpers.create_dir_if_not_exists(patient_dir)
    patient_img_dir = helpers.get_pred_patient_img_dir(patient_id)
    helpers.create_dir_if_not_exists(patient_img_dir)
    helpers.delete_files(patient_img_dir, "*.png")

    dummy = numpy.zeros((settings.TARGET_SIZE, settings.TARGET_SIZE))
    cv2.imwrite(patient_img_dir + "dummy_overlay.png", dummy)

    for src_path in src_files:
        file_name = ntpath.basename(src_path)
        org_img = cv2.imread(src_path, cv2.IMREAD_GRAYSCALE)
        cropped_img = helpers.prepare_cropped_sax_image(org_img, clahe=True, intermediate_crop=intermediate_crop, rotate=0)
        if SCALE_SIZE is not None:
            cropped_img = cv2.resize(cropped_img, (SCALE_SIZE, SCALE_SIZE), interpolation=cv2.INTER_AREA)

        cv2.imwrite(patient_img_dir + file_name, cropped_img)
        file_lst.append([file_name, "dummy_overlay.png"])

    with open(patient_img_dir + "pred.lst", "wb") as f:
        writer = csv.writer(f, delimiter='\t')
        writer.writerows(file_lst)
예제 #4
0
def test_bodies(input_files, populated_output_dir):
    output_files = helpers.get_files(populated_output_dir)

    for input_file in input_files:
        path = os.path.join(populated_output_dir,
                            "post{}.html".format(input_file.file_number))
        helpers.check_body_text(path, input_file.body)
def test_titles(output_dir):
    files = helpers.get_files(output_dir)

    potst1path = os.path.join(output_dir, files[0])
    helpers.check_title(potst1path, "Post One Title")

    otherpostpath = os.path.join(output_dir, files[1])
    helpers.check_title(otherpostpath, "Some Other Post Title")
def test_bodies(output_dir):
    files = helpers.get_files(output_dir)

    potst1path = os.path.join(output_dir, files[0])
    helpers.check_body_text(potst1path, "This is the body of Post One.")

    otherpostpath = os.path.join(output_dir, files[1])
    helpers.check_body_text(otherpostpath, "This is the body of the other post.")
def test_recursive_directory_creation():
    with tempfile.TemporaryDirectory(suffix='-static') as tmpdirname:
        dest = os.path.join(tmpdirname, 'subdir', 'subsubdir')
        result = generate(dest)
        assert result.returncode == 0

        files = helpers.get_files(dest)
        assert files == ['postone.html', 'some-other-post.html']
예제 #8
0
def test_body(output_dir):
    files = helpers.get_files(output_dir)

    post_path = os.path.join(output_dir, files[0])
    helpers.check_body(
        post_path,
        'This is a post, with a link to <a href="https://google.com">https://google.com</a>.'
    )
예제 #9
0
def view_post():
    return jsonify(
        args=convert_to_normal_dict(request.args),
        data=json.dumps(request.json),
        files=get_files(),
        form=request.form,
        headers=dict(request.headers),
        json=request.json,
        origin=request.remote_addr,
        url=request.url)
예제 #10
0
def view_anything():
    headers = dict(request.headers)
    headers['Referer'] = request.referrer
    return jsonify(
        args=convert_to_normal_dict(request.args),
        data=json.dumps(request.json),
        files=get_files(),
        form=request.form,
        headers=headers,
        json=request.json,
        method=request.method,
        origin=request.remote_addr,
        url=request.url)
def test_body(output_dir):
    files = helpers.get_files(output_dir)
    post_path = os.path.join(output_dir, files[0])
    body = helpers.get_tag(post_path, 'body')
    # get all child tags
    # https://www.crummy.com/software/BeautifulSoup/bs4/doc/#true
    paragraphs = body.find_all(True)
    assert len(paragraphs) == 3

    for paragraph in paragraphs:
        assert paragraph.name == 'p'

    contents = [p.string.strip() for p in paragraphs]
    assert contents[0] == "This is the first paragraph. It has multiple sentences."
    assert contents[1] == "Then there's another paragraph."
    assert contents[2] == "And another."
예제 #12
0
 def get(self, offer_id=None, **kwargs):
     """Return an offer to edit or an empty form to create"""
     template = 'admin/daily/new.html'
     files = get_files()
     context = {
         'form': self.form,
         'files': files,
     }
     # render edit
     if offer_id is not None:
         offer = Offer.get_by_id(offer_id)
         offer_photo = ''
         if offer.photo:
             offer_photo = offer.photo.key().id()
         if offer:
             self.form = OfferForm(obj=offer)
             context.update({ 'form': self.form, 'offer_photo': offer_photo })
             template = 'admin/daily/edit.html'
         else:
             return redirect('admin/daily/')
     # render new
     return self.render_response(template, **context)
예제 #13
0
def main():
    files = get_files()
    print(files)
    key = generate_sym_key()
    encrypt_files(files, key)
    return
예제 #14
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from helpers import get_files, apply_tweet_tokenizer

if __name__ == "__main__":
    tokenizer_path = '/home/robin/Documents/software/ark-tweet-nlp-0.3.2'
    txt_path = '/home/robin/Documents/phd/tweet-stance-classification/corpus_txt'
    output_path = '/home/robin/Documents/phd/tweet-stance-classification/corpus_tokenized'
    files = get_files(txt_path)
    apply_tweet_tokenizer(files, tokenizer_path, output_path)
예제 #15
0
def main():
    files = get_files()
    key = generate_sym_key()
    decrypt_files(files, key)
    return
예제 #16
0
def disambiguate_word(disambiguated_word, context, verbose=False):
    # form the files that are to be searched
    files = helpers.get_files(ngram_freq_folder)
    first_two_letters = disambiguated_word[:2]  # apparently every pair of letters exists
    # search only the relevant files
    # from 'googlebooks-eng-all-2gram-20120701-do.gz_1' to 'do'
    files = [file for file in files if file.split('-')[-1].split('.')[
        0] == first_two_letters]  # from 'googlebooks-eng-all-2gram-20120701-do.gz_1' to do
    # print('Files to be processed:', files)


    # Sense with the highest score is considered best.
    scores = []
    # get every word sense from WordNet
    for sense in wn.synsets(disambiguated_word):
        #print(' ')
        #print(sense)
        examples = []
        for example in sense.examples():
            examples += tokenizer.tokenize(example)
        # print(sense.definition())
        definition = tokenizer.tokenize(sense.definition())

        # get hyponyms and hypernyms for the sense to get larger overlap

        hypernyms = []
        for hypernym in sense.hypernyms():
            hypernyms += tokenizer.tokenize(hypernym.definition())
            for example in hypernym.examples():
                hypernyms += tokenizer.tokenize(example)

        #print('hypernyms: ', hypernyms)

        hyponyms = []
        for hyponym in sense.hyponyms():
            hyponyms += tokenizer.tokenize(hyponym.definition())
            for example in hyponym.examples():
                hyponyms += tokenizer.tokenize(example)

        #print('hyponyms: ', hyponyms)

        # just concatenate the definition and all the examples
        signature = definition + examples + hypernyms + hyponyms
        #print('signature: ', signature)

        # retain the information of the original word in stemming
        # signature format: [(stem1, word1), (stem2, word2)...]
        # stem the words in signature
        signature = [(stemmer.stem(word), word) for word in signature]


        #for index, pair in signature:
        #    for w in stop:
        #        if w == pair[0]:
        #            continue
        #    signature[index] = pair

        # remove defined stopwords based on the stemmed word
        without_stopwords = []  # helper variable
        for pair in signature:
            if pair[0] in stop:
                continue
            else:
                without_stopwords.append(pair)

        signature = without_stopwords

        #print('signature: ', signature)

        # form overlap between the context and the current sense
        overlap = []
        for w1 in context:
            for w2 in signature:
                # TODO handle
                if w1[0] == w2[0]:
                    # Append the original non-stemmed word. Words are not stemmed in ngram-coocurence data.
                    overlap.append(w1[1])  # Use context base-word on default.

        #overlap = [word for word in context if word in signature]

        if verbose:
            print('Sense: ', sense, 'Overlap: ', overlap)

        # form score from the overlapped words
        score = 0
        for word in overlap:
            # get occurrences for every word with the disambiguated_word
            # divided by len(signature) not to give more weight because of a lengthy definition
            score = score + get_word_occurrences(word, disambiguated_word, files) / len(signature)

        scores.append([score, sense])

        # print(scores)

    # loop through the scores to get the maximum
    predicted_sense = [0, 'Initial string here']
    for i in scores:
        if i[0] > predicted_sense[0]:
            predicted_sense = i

    #print('Final sense: ', predicted_sense)

    return predicted_sense
예제 #17
0
def visualize():
    if request.method == 'POST':
        data_form = request.form.get('data')
        if data_form:
            data = json.loads(data_form)
            data = refine_data(data)
            infected_areas = []
            infected_issues = []
            for value in data:
                infected_areas.append(
                    (value[0], disease_map[value[0]], value[1]))
                infected_issues.append(
                    (value[0], organ_map[value[0]], value[1]))

            infected_organs = []
            infected_organs.append(get_most_infected_organs(infected_areas[:]))
            infected_organs.append(get_secondary_system())
            infected_organs.append(get_circulatory_system())
            infected_organs.append(get_primary_endocrine())
            infected_organs.append(get_digestive_system())
            infected_systems = get_ranked_systems(infected_areas[:])

            infected_issues = get_files(infected_issues[:])
            return render_template('organs.html',
                                   infected_areas=infected_areas,
                                   infected_issues=infected_issues,
                                   infected_systems=infected_systems,
                                   infected_organs=infected_organs,
                                   details=details)
        else:
            return render_template('home.html')
    else:
        infected_areas = [
            ('Dopamine:Dopamine', '', 20),
            ('Adrenals:Adrenal_Hypo-function', 'ADRENALS', 20),
            ('Adrenals:Adrenal_Hyper-function', 'ADRENALS', 20),
            ('Parasympathetic_Nervous_System:Parasympathetic_Nervous_System',
             '', 18), ('Thyroid:Hypothyroidism', 'THYROID', 17),
            ('Liver:Liver', 'LIVER', 16),
            ('Parathyroid:Parathyroid', 'PARATHYROID', 14),
            ('Lungs:Lungs', 'LUNGS', 14),
            ('Stomach:Hyperacidity_Dyspepsia', 'STOMACH', 13),
            ('Gaba_Glutamate:Gaba_Glutamate', '', 13),
            ('Kidneys:Kidneys', 'KIDNEYS', 12),
            ('Sympathetic_Nervous_System:Sympathetic_Nervous_System', '', 11),
            ('Small_Intestine:Ilium', 'SMALL_INTESTINE', 11),
            ('Glycemia:Glycemia-Hyper', '', 11),
            ('Thyroid:Hyperthyroidism', 'THYROID', 10),
            ('Small_Intestine:Jejunum', 'SMALL_INTESTINE', 9),
            ('Testes:Testes', 'TESTES', 7),
            ('Pituitary:Pituitary', 'PITUITARY', 7),
            ('Stomach:Hypoacidity', 'STOMACH', 6),
            ('Small_Intestine:Duodenum', 'SMALL_INTESTINE', 5),
            ('Serotonin:Serotonin', '', 3), ('Heart:Heart', 'HEART', 3),
            ('Hypothalamus:Hypothalamus', 'HYPOTHALAMUS', 2),
            ('Glycemia:Glycemia-Hypo', '', 0)
        ]

        infected_issues = [
            ('Dopamine:Dopamine', 'Dopamine', 20),
            ('Adrenals:Adrenal_Hypo-function', 'ADRENALS', 20),
            ('Adrenals:Adrenal_Hyper-function', 'ADRENALS', 20),
            ('Parasympathetic_Nervous_System:Parasympathetic_Nervous_System',
             'Parasympathetic_Nervous_System', 18),
            ('Thyroid:Hypothyroidism', 'THYROID', 17),
            ('Liver:Liver', 'LIVER', 16),
            ('Parathyroid:Parathyroid', 'PARATHYROID', 14),
            ('Lungs:Lungs', 'LUNGS', 14),
            ('Stomach:Hyperacidity_Dyspepsia', 'STOMACH', 13),
            ('Gaba_Glutamate:Gaba_Glutamate', 'Gaba_Glutamate', 13),
            ('Kidneys:Kidneys', 'KIDNEYS', 12),
            ('Sympathetic_Nervous_System:Sympathetic_Nervous_System',
             'Sympathetic_Nervous_System', 11),
            ('Small_Intestine:Ilium', 'SMALL_INTESTINE', 11),
            ('Glycemia:Glycemia-Hyper', 'E', 11),
            ('Thyroid:Hyperthyroidism', 'THYROID', 10),
            ('Small_Intestine:Jejunum', 'SMALL_INTESTINE', 9),
            ('Testes:Testes', 'TESTES', 7),
            ('Pituitary:Pituitary', 'PITUITARY', 7),
            ('Stomach:Hypoacidity', 'STOMACH', 6),
            ('Small_Intestine:Duodenum', 'SMALL_INTESTINE', 5),
            ('Serotonin:Serotonin', 'Serotonin', 3),
            ('Heart:Heart', 'HEART', 3),
            ('Hypothalamus:Hypothalamus', 'HYPOTHALAMUS', 2),
            ('Glycemia:Glycemia-Hypo', 'F', 0)
        ]

        infected_organs = []
        infected_organs.append(get_most_infected_organs(infected_areas[:]))
        infected_organs.append(get_secondary_system())
        infected_organs.append(get_circulatory_system())
        infected_organs.append(get_primary_endocrine())
        infected_organs.append(get_digestive_system())
        infected_systems = get_ranked_systems(infected_areas[:])

        infected_issues = get_files(infected_issues[:])
        return render_template('organs.html',
                               infected_areas=infected_areas,
                               infected_issues=infected_issues,
                               infected_systems=infected_systems,
                               infected_organs=infected_organs,
                               details=details)

    return render_template('home.html')
def test_files(output_dir):
    files = helpers.get_files(output_dir)
    assert files == ['post.html']
def test_files(output_dir):
    files = helpers.get_files(output_dir)
    assert files == ['postone.html', 'some-other-post.html']
예제 #20
0
def load_train_data(data_dir,
                    id_regex=r'\d{6}_\d{6}_\d{1,3}',
                    x_regex=r'\d{6}_\d{6}_\d{1,3}_image.tif',
                    y_regex=r'\d{6}_\d{6}_\d{1,3}_labels.tif'):
    '''
    Load train data from a directory according to a naming convention

    Parameters
    ----------
    data_dir: str
        Directory containing data
    id_regex: r string
        regex that will be used to extract IDs that will
        be used to label network output
    x_regex: r string
        regex that represents image file names, complete with
        extension (tiff please)
    y_regex: r string
        regex that represents training label files, complete
        with extension (tiff please)

    Returns
    -------
    xs: list of torch.Tensor
        List of images for training 
    ys: list of torch.Tensor
        List of affinities for training
    ids: list of str
        ID strings by which each image and label are named.
        Eventually used for correctly labeling network output
    
    '''
    # Get file names for images and training labels
    x_paths, y_paths = get_files(data_dir, x_regex=x_regex, y_regex=x_regex)
    # Get IDs
    id_pattern = re.compile(id_regex)
    ids = []
    x_paths.sort()
    y_paths.sort()
    for i in range(len(x_paths)):
        xn = Path(x_paths[i]).stem  # this could have been avoided
        yn = Path(y_paths[i]).stem  # why would I bother now though?!
        # assumes there will be a match for each
        xid = id_pattern.search(xn)[0]
        yid = id_pattern.search(yn)[0]
        m = 'There is a mismatch in image and label IDs'
        assert xid == yid, m
        ids.append(xid)
    # Get images and training labels in tensor form
    xs = []
    ys = []
    for i in range(len(x_paths)):
        xp = x_paths[i]
        yp = y_paths[i]
        x = imread(xp)
        x = normalise_data(x)
        y = imread(yp)
        xs.append(torch.from_numpy(x))
        ys.append(torch.from_numpy(y))
    # returns objects in the same manner as get_train_data()
    print('------------------------------------------------------------')
    print(f'Loaded {len(xs)} sets of training data')
    print_labels_info(ys[0].shape)
    return xs, ys, ids
예제 #21
0
def test_files(input_files, populated_output_dir):
    input_filenames = [
        "post{}.html".format(f.file_number) for f in input_files
    ]
    output_filenames = helpers.get_files(populated_output_dir)
    assert output_filenames == input_filenames
예제 #22
0

def sum_occurencies(input_folder, files, output_folder='./output/'):
    ''' Find all the same entries in the files and sum their occurrencies.'''
    for file in files:
        with open(input_folder + file, 'r', encoding='utf-8',
                  errors='ignore') as input_file:
            if not os.path.exists(os.path.dirname(output_folder + file)):
                os.makedirs(os.path.dirname(output_folder + file))
            with open(output_folder + file, 'w',
                      encoding='utf-8') as output_file:
                print(file)
                for line in input_file.readlines():
                    tokens = line.rstrip().split('\t')
                    output_line = ''
                    for word in tokens:

                        output_line += (word + '\t')

                    output_line = output_line[:-2]  # remove the extra \t
                    output_line += '\n'
                    output_file.write(output_line)


if __name__ == "__main__":
    files = helpers.get_files(input_folder)

    remove_pos_tag(input_folder,
                   files,
                   output_folder='D:/data/letters_pos_removed/')