def main(): best_k, best_accuracy = 0, 0.0 # Obtaining and growing vareables to perform training and tests # The data is stored in a matrix to obtain the information separated by line and labeled matrix = funct.getText("corpus.txt", False) # It is decided to have a sample of 80% and 50% of the data that was previously obtained training = funct.sampleF(matrix, .8) test = funct.sampleF(matrix, .5) # Cross validation is obtained cross_validation = funct.sampleF(matrix, 1) # The data is classified according to the label and the number of words per label is calculated spam, ham = funct.classify(training) spam_words = funct.uniqueF(spam) ham_words = funct.uniqueF(ham) # The total number of words used for training is calculated column = [] for i in range(len(matrix)): label = training[i] column.append(label[1]) total_words_training = funct.uniqueF(column) # Initialize test for i in range(1, 10): salida_cross = logic.generateOutputMatrix(cross_validation, i, ham_words, spam_words, len(total_words_training), training) accuracy = logic.getAccuracy(salida_cross, cross_validation) if (accuracy > best_accuracy): best_accuracy, best_k = accuracy, i print("\nCROSS VALIADTION: Best K: ", best_k, " | Accuracy: ", best_accuracy) #--Test Phase--- salida_test = logic.generateOutputMatrix(test, best_k, ham_words, spam_words, len(total_words_training), training) test_acc = logic.getAccuracy(salida_test, test) print("\nTEST: Best K: ", best_k, " | Accuracy: ", test_acc, "\n") #--Input Phase--- input_file = funct.getText("prueba.txt", True) salida_input = logic.generateOutputMatrix(input_file, best_k, ham_words, spam_words, len(total_words_training), training) funct.write(salida_input, "output.txt")
def loadBadCh(self): filename = os.path.join(self.pathName, 'Artifacts', 'badChannels.txt') if os.path.exists(filename): with open(filename) as f: badChannels = f.read() print 'Bad Channels : {}'.format(badChannels) else: os.mkdir(os.path.join(self.pathName, 'Artifcats')) with open(filename, 'w') as f: f.write('') f.close() badChannels = [] return badChannels
def predict(with_embedding, input_prefix=''): if with_embedding: model_emb = models.load_model('data/out/lstm_model_emb') token2ind, ind2token = text_train.token2ind, text_train.ind2token text_prefix = Text(input_prefix, token2ind, ind2token) pred_emb = ModelPredict(model_emb, text_prefix, token2ind, ind2token, max_len, embedding=True) with open("./lstm_with_embedding_output.txt", 'w') as f: for idx in range(100): print(str(idx + 1) + "/100") f.write(pred.generate_sequence(40, temperature=0.7)) f.write('\n') else: model = models.load_model('lstm_model') token2ind, ind2token = text_train.token2ind, text_train.ind2token text_prefix = Text(input_prefix, token2ind, ind2token) pred = ModelPredict(model, text_prefix, token2ind, ind2token, max_len) with open("./lstm_output.txt", 'w') as f: for idx in range(100): print(str(idx + 1) + "/100") f.write(pred.generate_sequence(40, temperature=0.7)) f.write('\n')
def ui_list(storage): for i in range(31): ok = 1 for j in range(len(get_day(storage, i))): if(ok): print("\nday ", i, "\n---------------------------------------------------") ok = 0 print(write(storage, i, j)) if(not ok): print("\n")
def kSolutions(k: int, obj: list, nr_executii: int, filename): # obj[0] - lista obiecte, obj[1] - valoare maxima, obj[2] - nr obiecte max: int = -1 maxSol: list = [] solutii: list = [] for executie in range(nr_executii): start_time = time.time() for i in range(k): randomArray = validSolution(obj) value = fitness(randomArray, obj[0], obj[1]) if value > max: maxSol = randomArray max = value solutii.append([maxSol, max, start_time]) functions.write(filename, k, nr_executii, solutii)
def infer_profiles(fasta_file, files_dir, dummy_dir="/tmp/", latest=False, n=5, output_file=None, taxons=["fungi", "insects", "nematodes", "plants", "vertebrates"], threads=1): # Initialize base_name = os.path.basename(__file__) dummy_dir = os.path.join(dummy_dir, "%s.%s" % (base_name, os.getpid())) dummy_file = os.path.join(dummy_dir, "inferred_profiles.tsv") # Create dummy dir if not os.path.exists(dummy_dir): os.makedirs(dummy_dir) # Get sequences as SeqRecords # Note: https://biopython.org/wiki/SeqRecord seq_records = [] for seq_record in SeqIO.parse(fasta_file, "fasta"): seq_records.append(seq_record) # Load JSON files global domains, jaspar domains, jaspar = _load_json_files(files_dir) # Write functions.write(dummy_file, "Query\tTF Name\tTF Matrix\tE-value\tQuery Start-End\tTF Start-End\tDBD %ID") # Infer SeqRecord profiles pool = Pool(threads) parallelization = partial(infer_SeqRecord_profiles, files_dir=files_dir, dummy_dir=dummy_dir, latest=latest, n=n, taxons=taxons) for inference_results in tqdm(pool.imap(parallelization, iter(seq_records)), desc="Profile inference", total=len(seq_records)): # Sort by E-value, TF Name and Matrix if latest: inference_results.sort(key=lambda x: (x[3], x[1], -float(x[2][2:]))) else: inference_results.sort(key=lambda x: (x[3], x[1], float(x[2][2:]))) # For each inference... for i in range(len(inference_results)): # Use the lastest version of JASPAR if latest and i > 0: if inference_results[i][2][:6] == inference_results[i - 1][2][:6]: continue # Write functions.write(dummy_file, "\t".join(map(str, inference_results[i]))) pool.close() pool.join() # Write if output_file: shutil.copy(dummy_file, output_file) else: with open(dummy_file) as f: # For each line... for line in f: functions.write(None, line.strip("\n")) # Remove dummy dir shutil.rmtree(dummy_dir)
def ocr_pdf_page(path_pdf, save_folder): paths = misc.convert_pdf_to_image(path_pdf, dpi=300) for path in paths: save_path = os.path.join( save_folder, os.path.basename(path.replace('.png', '.csv'))) print("save_path = {}".format(save_path)) if not os.path.exists(save_path): img = cv2.imread(path) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) gray = ps.rotate_image(gray) print('Starting processing image ...') text = ps.segment_image(gray) with open(save_path, 'w') as f: f.write('{}\n'.format(','.join(HEADER))) for line in text: f.write('{}\n'.format(','.join(line))) misc.clean_csv(save_path) else: print("{} already exists, returning".format(save_path))
continue # Get chunks # m = 10000; n = len(profile) - 1 chunks = [sequence[i:i+m] for i in range(0, len(sequence), m - n)] # If last chunk is too small, merge it to the previous # if len(chunks[-1]) <= n: last_chunk = chunks.pop(-1); chunks[-1] += last_chunk # For each chunk... # for i in range(len(chunks)): # Initialize # relative_scores = {} chunk_start = i * (m - n) # Remove dummy FASTA file if exist # if os.path.exists(dummy_fasta): os.remove(dummy_fasta) # Create dummy FASTA file # functions.write(dummy_fasta, ">%s\n%s" % (header, chunks[i])) # For each jaspar match... # for chromosome, start, end, strand, relative_score in jaspar_search(pfm_file, dummy_fasta, options.rel_score_thresh): # Add to relative scores # relative_scores.setdefault((start + chunk_start, strand), int(relative_score * 1000)) # For each fimo match... # for chromosome, start, end, strand, p_value in fimo_search(os.path.abspath(options.meme_dir), os.path.join(os.path.abspath(options.profiles_dir), "%s.meme" % options.matrix_id), dummy_fasta, options.p_value_thresh): # If match in relative scores... # if (start + chunk_start, strand) in relative_scores: functions.write(dummy_file, "%s\t%s\t%s\t%s" % (start + chunk_start, strand, relative_scores[(start + chunk_start, strand)], int(log(p_value) * 1000 / -10))) # Remove dummy FASTA file if exist # if os.path.exists(dummy_fasta): os.remove(dummy_fasta) # If dummy file exists... # if os.path.exists(dummy_file): # Compress # functions.compress(dummy_file, output_file)
def cam(data): func.write(data) return redirect(url_for("index"))
# # If JASPAR bundle... # # if options.jaspar_bundle is not None: # Initialize # dummy_file = os.path.join(os.path.abspath(options.dummy_dir), "bundle.txt") profiles = {} header = [] profile = None # If dummy file exists # if os.path.exists(dummy_file): os.remove(dummy_file) # For each line... # for line in functions.parse_file(os.path.join(options.jaspar_bundle)): # If header... # if line.startswith(">"): # Write # functions.write(dummy_file, line) # ... Else... # else: m = re.search("^\s*(\w)\s*\[(.+)\]\s*$", line) # Write # if m: functions.write( dummy_file, "%s" % " ".join( map(str, [ int(float(i)) for i in re.findall( "[+-]?[0-9]*[.]?[0-9]+", m.group(2)) ]))) # Reformat JASPAR profiles to MEME profiles # process = subprocess.check_output([ os.path.join(os.path.abspath(options.meme_dir), "jaspar2meme"), "-bundle", dummy_file
res.append(item['user']['screen_name']+'\t'+item['id_str']+'\t'+text+'\t'+item['created_at']) for i in ngrams(tokens, 2): if ' '.join([j for j in i]) in A: a_values.append((item['id'], item['created_at'])) res.append(item['user']['screen_name']+'\t'+item['id_str']+'\t'+text+'\t'+item['created_at']) a_values_counter+=len(a_values) b_values_counter+=len(b_values) if len(b_values)>0 or len(a_values)>0: temp['B_values'] = b_values temp['A_values'] = a_values file_name = file_name.split('.',-1)[0] result[file_name] = temp sys.stdout.write('\r%d/%d'%(counter,num_of_files)) print("\nnumber of tweets;", tweets_counter) print("\nnumber of A values: ", a_values_counter) print("number of B values: ", b_values_counter) print("total values: ", a_values_counter+b_values_counter) print("\nfinal A value:", round(a_values_counter/tweets_counter,4)) print("final B value:", round(b_values_counter/tweets_counter,4)) print("final total value:", round((a_values_counter+b_values_counter)/tweets_counter,4)) if save: functions.write_json(result, output_dir+'spelling.json') functions.write(res, output_dir+'spelling.txt')
fr.extend(page) time.sleep(20) temp['friends'] = fr data.append({name: temp}) counter += 1 print('%d/%d \t %s done' % (counter, len(usernames), name)) else: counter += 1 print( '%d/%d \t """%s""" is a private/verified account or has many contats' % (counter, len(usernames), name)) except: counter += 1 print('%d/%d \t """%s""" This account does not exist anymore' % (counter, len(usernames), name)) if save: path = res_dir + username os.mkdir(path) functions.write_json_lst(data, res_dir + username + '_complete.txt') functions.write(usernames, res_dir + username + '_usernames.txt') functions.write(ids, res_dir + username + '_ids.txt') else: print('"""%s""" is a private/verified account or has many contats' % (username)) print("\nDONE")
a = input('1)Press ENTER to roll again with the same bet\n\ 2)enter a for a different bet\n\ 3)enter x to cash your balance\n') elif money == 0: a = 'x' #case2: user change a bet else: bet1, bet2, bet3, tbet = functions.bet(money, tbet) casino_vault += tbet money -= tbet functions.show_bet(bet1, bet2, bet3) money, casino_vault = functions.roll1(casino_vault, money, bet1, bet2, bet3, tbet) print('=' * 70) print('') print('Your balance: ', format(money, ',.2f'), '$', sep='') print('casino vault: ', format(casino_vault, ',.2f'), '$', sep='') print('') a = input('1)Press ENTER to roll again with the same bet\n\ 2)Press a for a different bet\n\ 3)enter x to cash your balance\n') else: a = 'x' functions.ending_chosing(casino_vault, money) functions.write(casino_vault) functions.score_board(money) #call the main function #How to prevent bad input like letters? #improve the prize mechanism
arg = sys.argv directory = os.path.dirname(os.path.realpath(__file__)) config = open(f"{directory}/.config.txt", "r") status = True if config.readlines()[0] == "init" else False if len(arg) == 1: print("\033[1;33mOlá! Bem-vindo ao Header!\033[0;0m") print("\033[1;32mStatus do Header: iniciado\033[0;0m" if status == True else "\033[1;31mStatus do Header: não iniciado\033[0;0m") print("\033[1;32mPara obter ajuda no uso da ferramenta, digite:\033[0;0m") print("python3 header.py help") else: arg.pop(0) if status and arg[0].lower() == "w": print(functions.write(arg)) elif status and arg[0].lower() == "ws": print(functions.write_save(arg, directory)) elif status and arg[0].lower() == "s": print(functions.save(arg, directory)) elif status and arg[0].lower() == "c": print(functions.create(arg, directory)) elif not status and arg[0].lower() == "init": print(functions.init(directory)) elif status and arg[0].lower() == "init": print("Seu Header já foi iniciado!") elif status and arg[0].lower() == "list": functions.list_ws(directory) elif status and arg[0].lower() == "e": print(functions.edit(arg, directory)) elif status and arg[0].lower() == "d":
s_area=f.square_area(3,4) print(s_area) ###############FILE HANDLING############### import os if os.path.exists("C://python notes//aa.txt"): os.remove("C://python notes\\aa.txt") else: print("file does not exist") ########### f=open("C://python notes\\a.txt","w") f.write("adding some data to the file. ") f.close() f=open("C://python notes//a.txt","r") print(f.readline()) f=open("C://python notes//a.txt","a") f.write("Adding some more data!\n yes.") f=open("C://python notes//a.txt","r") print(f.readline()) import os os.rmdir("C://python notes//AA") import os
print(ans) break # get game info homeTeamId = read(path + 'homeTeamId.txt') awayTeamId = read(path + 'awayTeamId.txt') if gameId != read(path + 'gameId.txt'): gameId = read(path + 'gameId.txt') print('Current game: {}'.format(read(path + 'gameName.txt'))) # get time remaining try: clockRunning = data[gameId]['clockRunning'] timeRemaining = data[gameId]['timeRemaining'] # write time remaining to file write(format_timeRemaining(timeRemaining), path + 'time.txt') except (KeyError, NameError): pass # get score try: write(data[gameId]['currentTeamScore'][homeTeamId], path + 'homeScore.txt') except (KeyError, NameError): pass try: write(data[gameId]['currentTeamScore'][awayTeamId], path + 'awayScore.txt') except (KeyError, NameError): pass
def scan_profile(input_file, pwmscan_dir, output_dir="./", background=[0.25, 0.25, 0.25, 0.25], p_value_thresh=0.05, rel_score_thresh=0.8, dummy_dir="/tmp/", profile=None): """ """ # Initialize # bed_file = os.path.join( dummy_dir, "%s.%s.%s.bed" % (os.path.basename(__file__), os.getpid(), profile.matrix_id)) pwm_file = os.path.join( dummy_dir, "%s.%s.%s.pwm" % (os.path.basename(__file__), os.getpid(), profile.matrix_id)) tsv_file = os.path.join( dummy_dir, "%s.%s.%s.tsv" % (os.path.basename(__file__), os.getpid(), profile.matrix_id)) output_file = os.path.join(output_dir, "%s.bed.gz" % profile.matrix_id) # Add background # profile.background = { "A": background[0], "C": background[1], "G": background[2], "T": background[3] } # Add JASPAR pseudocounts # profile.pseudocounts = motifs.jaspar.calculate_pseudocounts(profile) # Convert to PWMScan format # for i in range(len(profile.pssm["A"])): functions.write( pwm_file, "\t".join([str(int(profile.pssm[j][i] * 100)) for j in "ACGT"])) # Calculate distribution of matrix scores # try: process = subprocess.check_output( [os.path.join(pwmscan_dir, "matrix_prob"), pwm_file], stderr=subprocess.STDOUT) for line in process.split("\n"): m = re.search("(\S+)\s+(\S+)\s+(\S+)%", line) if m: score = m.group(1) p_value = float(m.group(2)) perc = float(m.group(3)) functions.write( tsv_file, "%s\t%s\t%s" % (score, int(perc * 10), int(log(p_value) * 1000 / -10))) if p_value < options.p_value_thresh and perc >= options.rel_score_thresh * 100: cutoff = score except: raise ValueError("Could not calculate distribution of matrix scores!") # Scan DNA sequence for TFBS matches # try: bash_command = '''%s -m %s -c %s %s | awk -v score_tab="%s" -v name="%s" 'BEGIN { while((getline line < score_tab) > 0 ) {split(line,f," "); scores[f[1]]=f[2]; pvalues[f[1]]=f[3]} close(score_tab) } {print $1"\t"$2"\t"$3"\t"name"\t"scores[$5]"\t"pvalues[$5]"\t"$6}' | gzip > %s''' % ( os.path.join(pwmscan_dir, "matrix_scan"), pwm_file, cutoff, os.path.abspath( options.input_file), tsv_file, profile.name, bed_file) process = subprocess.call(bash_command, shell=True, stderr=subprocess.STDOUT) except: raise ValueError("Could not scan DNA sequence file for TFBS matches!") # Write output # shutil.copy(bed_file, output_file) # Remove files # os.remove(bed_file) os.remove(pwm_file) os.remove(tsv_file)
if options.format == "csv": delimiter = "," if options.output_file is None: dummy_file = None else: dummy_file = os.path.join(os.path.abspath(options.dummy_dir), "%s.txt" % os.getpid()) rel_score_thresh = int(options.rel_score_thresh * 1000) # transform relative score threshold p_value_thresh = int(log(options.p_value_thresh) * 1000 / -10) # transform p-value threshold # Remove dummy file if exist # if dummy_file is not None: if os.path.exists(dummy_file): os.remove(dummy_file) # Write # if options.format != "bed": header = delimiter.join(["chr", "start (1-based)", "end"]) if options.scores == "rel_score": header += delimiter + "rel_score * 1000" elif options.scores == "p_value": header += delimiter + "p_value" else: header += delimiter + "rel_score * 1000" + delimiter + "-1 * log10(p_value) * 100" # Write # functions.write(dummy_file, header + delimiter + "strand") # For each matrix id and for each chr file... # for file_name in os.listdir(os.path.abspath(options.input_dir)): # Initialize # m = re.search("^(MA\d+\.\d)\.(chr\S+)\.tab\.gz$", file_name) if not m: continue matrix_id = m.group(1) chromosome = m.group(2) # Skip file if wrong matrix id # if options.matrix_id is not None: if matrix_id not in options.matrix_id.split(","): continue # Skip file if wrong chromosome # if options.chr is not None: if chromosome not in options.chr.split(","): continue # If no profile for matrix id... # if matrix_id not in profiles:
def make_files(out_dir=os.path.dirname(os.path.realpath(__file__))): # Initialize cwd = os.getcwd() matrix_ids = set() codec = coreapi.codecs.CoreJSONCodec() uniprot = UniProt(verbose=False, cache=False) # Create output dir if not os.path.exists(out_dir): os.makedirs(out_dir) # For each taxon... for taxon in taxons: # Skip if taxon profiles JSON file already exists profiles_json_file = os.path.join( out_dir, "%s.profiles.json" % taxon) if not os.path.exists(profiles_json_file): try: # Initialize profiles = {} client = coreapi.Client() response = client.get( "http://jaspar.genereg.net/api/v1/taxon/%s/" % taxon) json_obj = json.loads(codec.encode(response)) # While there are more pages... while json_obj["next"] is not None: # For each profile... for profile in json_obj["results"]: # If CORE collection profile... if profile["collection"] == "CORE": # Add profile profiles.setdefault(profile["matrix_id"], profile["name"]) # Go to next page response = client.get(json_obj["next"]) json_obj = json.loads(codec.encode(response)) # Do last page for profile in json_obj["results"]: # If CORE collection profile... if profile["collection"] == "CORE": # Add profile profiles.setdefault(profile["matrix_id"], profile["name"]) # Write functions.write(profiles_json_file, json.dumps( profiles, sort_keys=True, indent=4, separators=(",", ": "))) except: raise ValueError("Could not fetch %s profiles from JASPAR" % taxon) # Skip if taxon uniprot JSON file already exists uniprot_json_file = os.path.join(out_dir, "%s.uniprot.json" % taxon) if not os.path.exists(uniprot_json_file): try: # Initialize uniaccs = {} client = coreapi.Client() # Load JSON file with open(profiles_json_file) as f: profiles = json.load(f) # For each profile... for profile in sorted(profiles): # Get profile detailed info response = client.get( "http://jaspar.genereg.net/api/v1/matrix/%s/" % profile) json_obj = json.loads(codec.encode(response)) # Fix bugged cases if json_obj["matrix_id"] == "MA0328.1": json_obj["uniprot_ids"] = ["P0CY08"] if json_obj["matrix_id"] == "MA0110.1": json_obj["uniprot_ids"] = ["P46667"] if json_obj["matrix_id"] == "MA0058.1": json_obj["uniprot_ids"] = ["P61244"] if json_obj["matrix_id"] == "MA0046.1": json_obj["uniprot_ids"] = ["P20823"] if json_obj["matrix_id"] == "MA0098.1": json_obj["uniprot_ids"] = ["P14921"] if json_obj["matrix_id"] == "MA0052.1": json_obj["uniprot_ids"] = ["Q02078"] if json_obj["matrix_id"] == "MA0024.1": json_obj["uniprot_ids"] = ["Q01094"] if json_obj["matrix_id"] == "MA0138.1": json_obj["uniprot_ids"] = ["Q13127"] # For each UniProt Accession... for uniacc in json_obj["uniprot_ids"]: # Strip uniacc = uniacc.strip(" ") # Add uniacc uniaccs.setdefault(uniacc, [[], None]) if profile not in uniaccs[uniacc][0]: uniaccs[uniacc][0].append(profile) # For each UniProt Accession... for uniacc in uniaccs: # Get UniProt sequence uniaccs[uniacc][1] = uniprot.get_fasta_sequence(uniacc) # Write functions.write(uniprot_json_file, json.dumps( uniaccs, sort_keys=True, indent=4, separators=(",", ": "))) except: raise ValueError("Could not fetch %s sequences from UniProt" % taxon) # Skip if taxon FASTA file already exists fasta_file = os.path.join(out_dir, "%s.fa" % taxon) if not os.path.exists(fasta_file): # Load JSON file with open(uniprot_json_file) as f: uniaccs = json.load(f) # For each UniProt Accession... for uniacc in sorted(uniaccs): # Write functions.write(fasta_file, ">%s\n%s" % (uniacc, uniaccs[uniacc][1])) # Create BLAST+ db try: process = subprocess.check_output([ "makeblastdb", "-in", fasta_file, "-dbtype", "prot"], stderr=subprocess.STDOUT) except: raise ValueError("Could not create BLAST+ database: %s" % fasta_file) # Skip if Cis-BP JSON file already exists cisbp_json_file = os.path.join(out_dir, "cisbp.json") if not os.path.exists(cisbp_json_file): # Initialize proteins = {} prot_features = {} tfs = {} tf_families = {} # Create Cis-BP dir cisbp_dir = os.path.join(out_dir, "cisbp") if not os.path.exists(cisbp_dir): os.makedirs(cisbp_dir) # Change dir os.chdir(cisbp_dir) # Skip if TFs file already exists if not os.path.exists("cisbp_1.02.tfs.sql"): # Download SQL files os.system("curl --silent -O http://cisbp.ccbr.utoronto.ca/data/1.02/DataFiles/SQLDumps/SQLArchive_cisbp_1.02.zip") # Unzip os.system("unzip -qq SQLArchive_cisbp_1.02.zip") # Remove SQL files os.remove("SQLArchive_cisbp_1.02.zip") # For each ZIP file... for zip_file in frozenset(os.listdir(os.getcwd())): # Skip non-zip files if not zip_file.endswith(".zip"): continue # Unzip os.system("unzip -qq %s" % zip_file) os.remove(zip_file) # Return to original dir os.chdir(cwd) # Get protein features with open(os.path.join(cisbp_dir, "cisbp_1.02.prot_features.sql")) as f: # For each line... for line in f: m = re.search("\('.+', '(.+)', '.+', \d+, \d+, '(.+)'\)", line) if m: prot_features.setdefault(m.group(1), set()) prot_features[m.group(1)].add(m.group(2)) # Get TFs with open(os.path.join(cisbp_dir, "cisbp_1.02.tfs.sql")) as f: # For each line... for line in f: m = re.search("\('(.+)', '(.+)', '.+', '.+', '.+', '.+', '.+'\)", line) if m: tfs.setdefault(m.group(1), m.group(2)) # Get TF families with open(os.path.join(cisbp_dir, "cisbp_1.02.tf_families.sql")) as f: # For each line... for line in f: m = re.search("\('(.+)', '.+', '.+', \d+, (.+)\)", line) if m: tf_families.setdefault(m.group(1), m.group(2)) # Get proteins with open(os.path.join(cisbp_dir, "cisbp_1.02.proteins.sql")) as f: # For each line... for line in f: m = re.search("\('(.+)', '(.+)', '.+', '.+', '([A-Z]+)\W*'\)", line) if m: if m.group(1) not in prot_features: continue # Digest to MD5 h = hashlib.new("md5") h.update(m.group(3).encode("utf-8")) md5 = h.hexdigest() + m.group(3)[:4] + m.group(3)[-4:] proteins.setdefault(md5, [tf_families[tfs[m.group(2)]], []]) # For each domain... for domain in prot_features[m.group(1)]: if domain not in proteins[md5][1]: proteins[md5][1].append(domain) # Write functions.write(cisbp_json_file, json.dumps( proteins, sort_keys=True, indent=4, separators=(",", ": "))) # Remove Cis-BP dir shutil.rmtree(cisbp_dir) # Skip if JSON files already exist domains_json_file = os.path.join(out_dir, "domains.json") jaspar_json_file = os.path.join(out_dir, "jaspar.json") if not os.path.exists(domains_json_file) or not os.path.exists(jaspar_json_file): # Initialize domains = {} jaspar = {} # Load JSON file with open(cisbp_json_file) as f: cisbp = json.load(f) # Remove JSON files if os.path.exists(domains_json_file): domains_json_file if os.path.exists(jaspar_json_file): jaspar_json_file # For each taxon... for taxon in taxons: # Load JSON files profiles_json_file = os.path.join( out_dir, "%s.profiles.json" % taxon) with open(profiles_json_file) as f: profiles = json.load(f) uniprot_json_file = os.path.join( out_dir, "%s.uniprot.json" % taxon) with open(uniprot_json_file) as f: uniaccs = json.load(f) # For each UniProt Accession... for uniacc in sorted(uniaccs): # Skip if no sequence if uniaccs[uniacc][1] is None: continue # Digest to MD5 h = hashlib.new("md5") h.update(uniaccs[uniacc][1].encode("utf-8")) md5 = h.hexdigest() + uniaccs[uniacc][1][:4] + uniaccs[uniacc][1][-4:] # If sequence in Cis-BP... if md5 in cisbp: # Add to domains domains.setdefault(uniacc, [cisbp[md5][1], cisbp[md5][0]]) # For each profile... for profile in uniaccs[uniacc][0]: # Add to JASPAR jaspar.setdefault(uniacc, []) jaspar[uniacc].append([profile, profiles[profile]]) # Write functions.write(domains_json_file, json.dumps( domains, sort_keys=True, indent=4, separators=(",", ": "))) functions.write(jaspar_json_file, json.dumps( jaspar, sort_keys=True, indent=4, separators=(",", ": ")))
grams = [] for j in ngrams(tokens, 2): grams.append(' '.join([x for x in j])) for gram in grams: if gram == i: idx = grams.index(gram) if idx < len(tags) - 2: if tags[idx + 2][1] in ['VB', 'VBP']: id_date.append( (item['id_str'], item['created_at'])) res.append(item['user']['screen_name'] + '\t' + item['id_str'] + '\t' + text + '\t' + item['created_at']) instance_counter += len(id_date) if len(id_date) > 0: file_name = file_name.split('.', -1)[0] result[file_name] = id_date sys.stdout.write('\r%d/%d' % (counter, num_of_files)) print("\nnumber of tweets;", tweets_counter) print("found instaces: ", instance_counter) print("final value:", round(instance_counter / tweets_counter, 4)) if save: functions.write_json(result, output_dir + 'lexico-grammar(' + verb[0] + ').json') functions.write(res, output_dir + 'lexico-grammar(' + verb[0] + ').txt')
def main(): casino_vault = functions.read() #a is a sentinel deciding telling the program #which case it is a = '' #call the instrucion() functions.instruction_slot(casino_vault) #let user enter a balance money = 1000000 #set default multiplier default_multiplier = 1 #let user enter a multiplier multiplier = input('Enter your multiplier(DEFAULT 1):\n') multiplier = functions.valid2(multiplier, default_multiplier, money) #loop while not a == 'x': if casino_vault > 0: #case1: using the old multiplier and roll again if a == '': if money >= multiplier: money -= multiplier casino_vault += multiplier print('==========================') prize = functions.roll2(multiplier, casino_vault) money += prize casino_vault -= prize print('') print('Your balance: ', format(money, ',.2f'), '$', sep='') print('') #different value of a can allow user #to roll again with the same multiplier; #change the multiplier and roll again; #or exit the game a = input('1)Press ENTER to roll again\n\ 2)enter new multiplier\n\ 3)enter x to cash your balance\n') #case2: You run out of money or the current multiplier is too big else: if money == 0: a = 'x' else: #let user to decide option = input( 'You do not have enough money for a roll\n\ 1)enter a new multiplier\n\ 2)enter x to exit\n') #entering b: change the multiplier if option == 'x': a = 'x' #entering anything else: exit the game else: multiplier = functions.valid2( option, default_multiplier, money) #case3: user change a multiplier else: multiplier = functions.valid2(a, default_multiplier, money) money -= multiplier casino_vault += multiplier print('==========================') prize = functions.roll2(multiplier, casino_vault) money += prize casino_vault -= prize print('') print('Your balance: ', format(money, ',.2f'), '$', sep='') print('') a = input('1)Press ENTER to roll again\n\ 2)enter new multiplier\n\ 3)enter x to cash your balance\n') else: a = 'x' functions.ending_chosing(casino_vault, money) functions.write(casino_vault) functions.score_board(money)
# 8. Show end time tz_Sydney = pytz.timezone('Australia/Sydney') datetime_Sydney_end = datetime.now(tz_Sydney) print("\nYou finished at: ", datetime_Sydney_end.strftime("%H:%M:%S")) # 9. Calculate and show total time spent in program end_time = datetime.now() duration = end_time - start_time duration_in_seconds = duration.total_seconds() minutes = round(duration_in_seconds / 60, 2) # Log in time in time_log file f = open("time_log", "a") datetime_Sydney = str(datetime_Sydney) f.write(f"\n{datetime_Sydney}") f.close() # 10. Log goal in goal_log file f = open("goal_log", "a") today = date.today() datetime_Sydney = str(datetime_Sydney) f.write(f"\n {today}: {today_goal.capitalize()}") f.close() # 11. Print time spent in program print(f"\nYou spent {minutes} minutes logged on today.\n") # 12. Exit message print( f"Remember your goal for today: {today_goal.capitalize()}. Have a great day!"
bathrooms = 2 garages = 2 #UV = "$499,000" #EER = "2.3" #blocksize = 788 housesize = 50 #------------- new_line = new_feature.format(node,propertytype,name,url,address,price,bedrooms,bathrooms,garages,UV,EER,blocksize,housesize,timestamp,longitude,latitude) # if a == len(address_list)-1: # output = output + new_line # if the feature is the last don't add a comma # else: output = output + new_line + "\n,\n" print '-------------------' # remove last line? output = output + postamble #print output # write geojson output to file f = open("addresses.geojson","w+") f.write(output) f.close()
t = [] for item in data: if len(item['entities']['hashtags']) != 0: for h in item['entities']['hashtags']: if h['text'].lower() in pattern: t.append((item['id_str'], item['created_at'], h['text'].lower())) hashtag_counter += 1 text = item['full_text'] text = re.sub(r"http\S+", " ", text) text = re.sub(r"\s+", " ", text) res.append(item['user']['screen_name'] + '\t' + item['id_str'] + '\t' + text + '\t' + item['created_at']) if len(t) > 0: file_name = file_name.split('.', -1)[0] result[file_name] = t sys.stdout.write('\r%d/%d' % (counter, num_of_files)) print("\nnumber of tweets;", tweets_counter) print("found hashtags: ", hashtag_counter) print("final value:", round(hashtag_counter / tweets_counter, 5)) if save: functions.write_json(result, output_dir + 'lexis(' + pattern[0] + ').json') functions.write(res, output_dir + 'lexis(' + pattern[0] + ').txt')
subscribe['apiCommand'] = "subscribe" subscribe['apiKey'] = read('apiKey.txt') subscribe['requestId'] = "test connection" subscribe['eventId'] = read('eventId.txt') # setup WebSocket ws = websocket.WebSocket() # start ws connection ws.connect(url) # subscribe to feed ws.send(json.dumps(subscribe)) # initialize files write('Home', path + 'homeTeamName.txt') write('Away', path + 'awayTeamName.txt') reset_score() # main loop while (True): # get data ans = ws.recv() try: data = json.loads(ans)['data'] except KeyError: print(ans) break # get team info try:
def save_to_file(file_name, time, data): if file_name.endswith(".txt") is False: file_name += ".txt" with open(c.SAVE_LOCATION + file_name, "a") as f: f.write("{}\t{}\n".format(time, data))