def get_dict(*keys, **extras): """Returns request dict of given keys.""" _keys = ('url', 'args', 'form', 'data', 'origin', 'headers', 'files', 'json', 'method') assert all(map(_keys.__contains__, keys)) data = request.data form = semiflatten(request.form) try: _json = json.loads(data.decode('utf-8')) except (ValueError, TypeError): _json = None d = dict( url=get_url(request), args=semiflatten(request.args), form=form, data=json_safe(data), origin=request.headers.get('X-Forwarded-For', request.remote_addr), headers=get_headers(), files=get_files(), json=_json, method=request.method, ) out_d = dict() for key in keys: out_d[key] = d.get(key) out_d.update(extras) return out_d
def get(self, product_id=None, **kwargs): """Return a product to edit or an empty form to create""" template = 'admin/product/new.html' files = get_files() #print self.form.photo.choices context = { 'files': files, 'form': self.form, } # render edit form if product_id is not None: product = Product.get_by_id(product_id) if product: self.form = ProductForm(obj=product) self.form.tags.data = ', '.join(product.tags) product_photo = '' if product.photo: product_photo = product.photo.key().id() context.update({ 'form': self.form, 'product_photo': product_photo }) template = 'admin/product/edit.html' else: return redirect('/admin/shop/') # render new form return self.render_response(template, **context)
def prepare_patient_images(patient_id, intermediate_crop=0): file_lst = [] prefix = str(patient_id).rjust(4, '0') src_files = helpers.get_files(settings.BASE_PREPROCESSEDIMAGES_DIR, prefix + "*.png") patient_dir = helpers.get_pred_patient_dir(patient_id) helpers.create_dir_if_not_exists(patient_dir) patient_img_dir = helpers.get_pred_patient_img_dir(patient_id) helpers.create_dir_if_not_exists(patient_img_dir) helpers.delete_files(patient_img_dir, "*.png") dummy = numpy.zeros((settings.TARGET_SIZE, settings.TARGET_SIZE)) cv2.imwrite(patient_img_dir + "dummy_overlay.png", dummy) for src_path in src_files: file_name = ntpath.basename(src_path) org_img = cv2.imread(src_path, cv2.IMREAD_GRAYSCALE) cropped_img = helpers.prepare_cropped_sax_image(org_img, clahe=True, intermediate_crop=intermediate_crop, rotate=0) if SCALE_SIZE is not None: cropped_img = cv2.resize(cropped_img, (SCALE_SIZE, SCALE_SIZE), interpolation=cv2.INTER_AREA) cv2.imwrite(patient_img_dir + file_name, cropped_img) file_lst.append([file_name, "dummy_overlay.png"]) with open(patient_img_dir + "pred.lst", "wb") as f: writer = csv.writer(f, delimiter='\t') writer.writerows(file_lst)
def test_bodies(input_files, populated_output_dir): output_files = helpers.get_files(populated_output_dir) for input_file in input_files: path = os.path.join(populated_output_dir, "post{}.html".format(input_file.file_number)) helpers.check_body_text(path, input_file.body)
def test_titles(output_dir): files = helpers.get_files(output_dir) potst1path = os.path.join(output_dir, files[0]) helpers.check_title(potst1path, "Post One Title") otherpostpath = os.path.join(output_dir, files[1]) helpers.check_title(otherpostpath, "Some Other Post Title")
def test_bodies(output_dir): files = helpers.get_files(output_dir) potst1path = os.path.join(output_dir, files[0]) helpers.check_body_text(potst1path, "This is the body of Post One.") otherpostpath = os.path.join(output_dir, files[1]) helpers.check_body_text(otherpostpath, "This is the body of the other post.")
def test_recursive_directory_creation(): with tempfile.TemporaryDirectory(suffix='-static') as tmpdirname: dest = os.path.join(tmpdirname, 'subdir', 'subsubdir') result = generate(dest) assert result.returncode == 0 files = helpers.get_files(dest) assert files == ['postone.html', 'some-other-post.html']
def test_body(output_dir): files = helpers.get_files(output_dir) post_path = os.path.join(output_dir, files[0]) helpers.check_body( post_path, 'This is a post, with a link to <a href="https://google.com">https://google.com</a>.' )
def view_post(): return jsonify( args=convert_to_normal_dict(request.args), data=json.dumps(request.json), files=get_files(), form=request.form, headers=dict(request.headers), json=request.json, origin=request.remote_addr, url=request.url)
def view_anything(): headers = dict(request.headers) headers['Referer'] = request.referrer return jsonify( args=convert_to_normal_dict(request.args), data=json.dumps(request.json), files=get_files(), form=request.form, headers=headers, json=request.json, method=request.method, origin=request.remote_addr, url=request.url)
def test_body(output_dir): files = helpers.get_files(output_dir) post_path = os.path.join(output_dir, files[0]) body = helpers.get_tag(post_path, 'body') # get all child tags # https://www.crummy.com/software/BeautifulSoup/bs4/doc/#true paragraphs = body.find_all(True) assert len(paragraphs) == 3 for paragraph in paragraphs: assert paragraph.name == 'p' contents = [p.string.strip() for p in paragraphs] assert contents[0] == "This is the first paragraph. It has multiple sentences." assert contents[1] == "Then there's another paragraph." assert contents[2] == "And another."
def get(self, offer_id=None, **kwargs): """Return an offer to edit or an empty form to create""" template = 'admin/daily/new.html' files = get_files() context = { 'form': self.form, 'files': files, } # render edit if offer_id is not None: offer = Offer.get_by_id(offer_id) offer_photo = '' if offer.photo: offer_photo = offer.photo.key().id() if offer: self.form = OfferForm(obj=offer) context.update({ 'form': self.form, 'offer_photo': offer_photo }) template = 'admin/daily/edit.html' else: return redirect('admin/daily/') # render new return self.render_response(template, **context)
def main(): files = get_files() print(files) key = generate_sym_key() encrypt_files(files, key) return
#!/usr/bin/env python # -*- coding: utf-8 -*- from helpers import get_files, apply_tweet_tokenizer if __name__ == "__main__": tokenizer_path = '/home/robin/Documents/software/ark-tweet-nlp-0.3.2' txt_path = '/home/robin/Documents/phd/tweet-stance-classification/corpus_txt' output_path = '/home/robin/Documents/phd/tweet-stance-classification/corpus_tokenized' files = get_files(txt_path) apply_tweet_tokenizer(files, tokenizer_path, output_path)
def main(): files = get_files() key = generate_sym_key() decrypt_files(files, key) return
def disambiguate_word(disambiguated_word, context, verbose=False): # form the files that are to be searched files = helpers.get_files(ngram_freq_folder) first_two_letters = disambiguated_word[:2] # apparently every pair of letters exists # search only the relevant files # from 'googlebooks-eng-all-2gram-20120701-do.gz_1' to 'do' files = [file for file in files if file.split('-')[-1].split('.')[ 0] == first_two_letters] # from 'googlebooks-eng-all-2gram-20120701-do.gz_1' to do # print('Files to be processed:', files) # Sense with the highest score is considered best. scores = [] # get every word sense from WordNet for sense in wn.synsets(disambiguated_word): #print(' ') #print(sense) examples = [] for example in sense.examples(): examples += tokenizer.tokenize(example) # print(sense.definition()) definition = tokenizer.tokenize(sense.definition()) # get hyponyms and hypernyms for the sense to get larger overlap hypernyms = [] for hypernym in sense.hypernyms(): hypernyms += tokenizer.tokenize(hypernym.definition()) for example in hypernym.examples(): hypernyms += tokenizer.tokenize(example) #print('hypernyms: ', hypernyms) hyponyms = [] for hyponym in sense.hyponyms(): hyponyms += tokenizer.tokenize(hyponym.definition()) for example in hyponym.examples(): hyponyms += tokenizer.tokenize(example) #print('hyponyms: ', hyponyms) # just concatenate the definition and all the examples signature = definition + examples + hypernyms + hyponyms #print('signature: ', signature) # retain the information of the original word in stemming # signature format: [(stem1, word1), (stem2, word2)...] # stem the words in signature signature = [(stemmer.stem(word), word) for word in signature] #for index, pair in signature: # for w in stop: # if w == pair[0]: # continue # signature[index] = pair # remove defined stopwords based on the stemmed word without_stopwords = [] # helper variable for pair in signature: if pair[0] in stop: continue else: without_stopwords.append(pair) signature = without_stopwords #print('signature: ', signature) # form overlap between the context and the current sense overlap = [] for w1 in context: for w2 in signature: # TODO handle if w1[0] == w2[0]: # Append the original non-stemmed word. Words are not stemmed in ngram-coocurence data. overlap.append(w1[1]) # Use context base-word on default. #overlap = [word for word in context if word in signature] if verbose: print('Sense: ', sense, 'Overlap: ', overlap) # form score from the overlapped words score = 0 for word in overlap: # get occurrences for every word with the disambiguated_word # divided by len(signature) not to give more weight because of a lengthy definition score = score + get_word_occurrences(word, disambiguated_word, files) / len(signature) scores.append([score, sense]) # print(scores) # loop through the scores to get the maximum predicted_sense = [0, 'Initial string here'] for i in scores: if i[0] > predicted_sense[0]: predicted_sense = i #print('Final sense: ', predicted_sense) return predicted_sense
def visualize(): if request.method == 'POST': data_form = request.form.get('data') if data_form: data = json.loads(data_form) data = refine_data(data) infected_areas = [] infected_issues = [] for value in data: infected_areas.append( (value[0], disease_map[value[0]], value[1])) infected_issues.append( (value[0], organ_map[value[0]], value[1])) infected_organs = [] infected_organs.append(get_most_infected_organs(infected_areas[:])) infected_organs.append(get_secondary_system()) infected_organs.append(get_circulatory_system()) infected_organs.append(get_primary_endocrine()) infected_organs.append(get_digestive_system()) infected_systems = get_ranked_systems(infected_areas[:]) infected_issues = get_files(infected_issues[:]) return render_template('organs.html', infected_areas=infected_areas, infected_issues=infected_issues, infected_systems=infected_systems, infected_organs=infected_organs, details=details) else: return render_template('home.html') else: infected_areas = [ ('Dopamine:Dopamine', '', 20), ('Adrenals:Adrenal_Hypo-function', 'ADRENALS', 20), ('Adrenals:Adrenal_Hyper-function', 'ADRENALS', 20), ('Parasympathetic_Nervous_System:Parasympathetic_Nervous_System', '', 18), ('Thyroid:Hypothyroidism', 'THYROID', 17), ('Liver:Liver', 'LIVER', 16), ('Parathyroid:Parathyroid', 'PARATHYROID', 14), ('Lungs:Lungs', 'LUNGS', 14), ('Stomach:Hyperacidity_Dyspepsia', 'STOMACH', 13), ('Gaba_Glutamate:Gaba_Glutamate', '', 13), ('Kidneys:Kidneys', 'KIDNEYS', 12), ('Sympathetic_Nervous_System:Sympathetic_Nervous_System', '', 11), ('Small_Intestine:Ilium', 'SMALL_INTESTINE', 11), ('Glycemia:Glycemia-Hyper', '', 11), ('Thyroid:Hyperthyroidism', 'THYROID', 10), ('Small_Intestine:Jejunum', 'SMALL_INTESTINE', 9), ('Testes:Testes', 'TESTES', 7), ('Pituitary:Pituitary', 'PITUITARY', 7), ('Stomach:Hypoacidity', 'STOMACH', 6), ('Small_Intestine:Duodenum', 'SMALL_INTESTINE', 5), ('Serotonin:Serotonin', '', 3), ('Heart:Heart', 'HEART', 3), ('Hypothalamus:Hypothalamus', 'HYPOTHALAMUS', 2), ('Glycemia:Glycemia-Hypo', '', 0) ] infected_issues = [ ('Dopamine:Dopamine', 'Dopamine', 20), ('Adrenals:Adrenal_Hypo-function', 'ADRENALS', 20), ('Adrenals:Adrenal_Hyper-function', 'ADRENALS', 20), ('Parasympathetic_Nervous_System:Parasympathetic_Nervous_System', 'Parasympathetic_Nervous_System', 18), ('Thyroid:Hypothyroidism', 'THYROID', 17), ('Liver:Liver', 'LIVER', 16), ('Parathyroid:Parathyroid', 'PARATHYROID', 14), ('Lungs:Lungs', 'LUNGS', 14), ('Stomach:Hyperacidity_Dyspepsia', 'STOMACH', 13), ('Gaba_Glutamate:Gaba_Glutamate', 'Gaba_Glutamate', 13), ('Kidneys:Kidneys', 'KIDNEYS', 12), ('Sympathetic_Nervous_System:Sympathetic_Nervous_System', 'Sympathetic_Nervous_System', 11), ('Small_Intestine:Ilium', 'SMALL_INTESTINE', 11), ('Glycemia:Glycemia-Hyper', 'E', 11), ('Thyroid:Hyperthyroidism', 'THYROID', 10), ('Small_Intestine:Jejunum', 'SMALL_INTESTINE', 9), ('Testes:Testes', 'TESTES', 7), ('Pituitary:Pituitary', 'PITUITARY', 7), ('Stomach:Hypoacidity', 'STOMACH', 6), ('Small_Intestine:Duodenum', 'SMALL_INTESTINE', 5), ('Serotonin:Serotonin', 'Serotonin', 3), ('Heart:Heart', 'HEART', 3), ('Hypothalamus:Hypothalamus', 'HYPOTHALAMUS', 2), ('Glycemia:Glycemia-Hypo', 'F', 0) ] infected_organs = [] infected_organs.append(get_most_infected_organs(infected_areas[:])) infected_organs.append(get_secondary_system()) infected_organs.append(get_circulatory_system()) infected_organs.append(get_primary_endocrine()) infected_organs.append(get_digestive_system()) infected_systems = get_ranked_systems(infected_areas[:]) infected_issues = get_files(infected_issues[:]) return render_template('organs.html', infected_areas=infected_areas, infected_issues=infected_issues, infected_systems=infected_systems, infected_organs=infected_organs, details=details) return render_template('home.html')
def test_files(output_dir): files = helpers.get_files(output_dir) assert files == ['post.html']
def test_files(output_dir): files = helpers.get_files(output_dir) assert files == ['postone.html', 'some-other-post.html']
def load_train_data(data_dir, id_regex=r'\d{6}_\d{6}_\d{1,3}', x_regex=r'\d{6}_\d{6}_\d{1,3}_image.tif', y_regex=r'\d{6}_\d{6}_\d{1,3}_labels.tif'): ''' Load train data from a directory according to a naming convention Parameters ---------- data_dir: str Directory containing data id_regex: r string regex that will be used to extract IDs that will be used to label network output x_regex: r string regex that represents image file names, complete with extension (tiff please) y_regex: r string regex that represents training label files, complete with extension (tiff please) Returns ------- xs: list of torch.Tensor List of images for training ys: list of torch.Tensor List of affinities for training ids: list of str ID strings by which each image and label are named. Eventually used for correctly labeling network output ''' # Get file names for images and training labels x_paths, y_paths = get_files(data_dir, x_regex=x_regex, y_regex=x_regex) # Get IDs id_pattern = re.compile(id_regex) ids = [] x_paths.sort() y_paths.sort() for i in range(len(x_paths)): xn = Path(x_paths[i]).stem # this could have been avoided yn = Path(y_paths[i]).stem # why would I bother now though?! # assumes there will be a match for each xid = id_pattern.search(xn)[0] yid = id_pattern.search(yn)[0] m = 'There is a mismatch in image and label IDs' assert xid == yid, m ids.append(xid) # Get images and training labels in tensor form xs = [] ys = [] for i in range(len(x_paths)): xp = x_paths[i] yp = y_paths[i] x = imread(xp) x = normalise_data(x) y = imread(yp) xs.append(torch.from_numpy(x)) ys.append(torch.from_numpy(y)) # returns objects in the same manner as get_train_data() print('------------------------------------------------------------') print(f'Loaded {len(xs)} sets of training data') print_labels_info(ys[0].shape) return xs, ys, ids
def test_files(input_files, populated_output_dir): input_filenames = [ "post{}.html".format(f.file_number) for f in input_files ] output_filenames = helpers.get_files(populated_output_dir) assert output_filenames == input_filenames
def sum_occurencies(input_folder, files, output_folder='./output/'): ''' Find all the same entries in the files and sum their occurrencies.''' for file in files: with open(input_folder + file, 'r', encoding='utf-8', errors='ignore') as input_file: if not os.path.exists(os.path.dirname(output_folder + file)): os.makedirs(os.path.dirname(output_folder + file)) with open(output_folder + file, 'w', encoding='utf-8') as output_file: print(file) for line in input_file.readlines(): tokens = line.rstrip().split('\t') output_line = '' for word in tokens: output_line += (word + '\t') output_line = output_line[:-2] # remove the extra \t output_line += '\n' output_file.write(output_line) if __name__ == "__main__": files = helpers.get_files(input_folder) remove_pos_tag(input_folder, files, output_folder='D:/data/letters_pos_removed/')