Esempio n. 1
0
def main():
    best_k, best_accuracy = 0, 0.0
    # Obtaining and growing vareables to perform training and tests
    # The data is stored in a matrix to obtain the information separated by line and labeled
    matrix = funct.getText("corpus.txt", False)

    # It is decided to have a sample of 80% and 50% of the data that was previously obtained
    training = funct.sampleF(matrix, .8)
    test = funct.sampleF(matrix, .5)

    # Cross validation is obtained
    cross_validation = funct.sampleF(matrix, 1)

    # The data is classified according to the label and the number of words per label is calculated
    spam, ham = funct.classify(training)
    spam_words = funct.uniqueF(spam)
    ham_words = funct.uniqueF(ham)

    # The total number of words used for training is calculated
    column = []
    for i in range(len(matrix)):
        label = training[i]
        column.append(label[1])
    total_words_training = funct.uniqueF(column)

    # Initialize test

    for i in range(1, 10):
        salida_cross = logic.generateOutputMatrix(cross_validation, i,
                                                  ham_words, spam_words,
                                                  len(total_words_training),
                                                  training)
        accuracy = logic.getAccuracy(salida_cross, cross_validation)
        if (accuracy > best_accuracy): best_accuracy, best_k = accuracy, i
    print("\nCROSS VALIADTION: Best K: ", best_k, " | Accuracy: ",
          best_accuracy)

    #--Test Phase---
    salida_test = logic.generateOutputMatrix(test, best_k, ham_words,
                                             spam_words,
                                             len(total_words_training),
                                             training)
    test_acc = logic.getAccuracy(salida_test, test)
    print("\nTEST: Best K: ", best_k, " | Accuracy: ", test_acc, "\n")

    #--Input Phase---
    input_file = funct.getText("prueba.txt", True)
    salida_input = logic.generateOutputMatrix(input_file, best_k, ham_words,
                                              spam_words,
                                              len(total_words_training),
                                              training)
    funct.write(salida_input, "output.txt")
Esempio n. 2
0
 def loadBadCh(self):
     filename = os.path.join(self.pathName, 'Artifacts', 'badChannels.txt')
     if os.path.exists(filename):
         with open(filename) as f:
             badChannels = f.read()
             print 'Bad Channels : {}'.format(badChannels)
     else:
         os.mkdir(os.path.join(self.pathName, 'Artifcats'))
         with open(filename, 'w') as f:
             f.write('')
             f.close()
         badChannels = []
     return badChannels
Esempio n. 3
0
def predict(with_embedding, input_prefix=''):
    if with_embedding:
        model_emb = models.load_model('data/out/lstm_model_emb')

        token2ind, ind2token = text_train.token2ind, text_train.ind2token

        text_prefix = Text(input_prefix, token2ind, ind2token)

        pred_emb = ModelPredict(model_emb,
                                text_prefix,
                                token2ind,
                                ind2token,
                                max_len,
                                embedding=True)

        with open("./lstm_with_embedding_output.txt", 'w') as f:
            for idx in range(100):
                print(str(idx + 1) + "/100")
                f.write(pred.generate_sequence(40, temperature=0.7))
                f.write('\n')
    else:
        model = models.load_model('lstm_model')

        token2ind, ind2token = text_train.token2ind, text_train.ind2token

        text_prefix = Text(input_prefix, token2ind, ind2token)

        pred = ModelPredict(model, text_prefix, token2ind, ind2token, max_len)

        with open("./lstm_output.txt", 'w') as f:
            for idx in range(100):
                print(str(idx + 1) + "/100")
                f.write(pred.generate_sequence(40, temperature=0.7))
                f.write('\n')
Esempio n. 4
0
def ui_list(storage):
    for i in range(31):
        ok = 1
        for j in range(len(get_day(storage, i))):
              if(ok):
                   print("\nday ", i, "\n---------------------------------------------------")
                   ok = 0
              print(write(storage, i, j))
        if(not ok):
            print("\n")
Esempio n. 5
0
def kSolutions(k: int, obj: list, nr_executii: int, filename):
    # obj[0] - lista obiecte, obj[1] - valoare maxima, obj[2] - nr obiecte

    max: int = -1
    maxSol: list = []
    solutii: list = []

    for executie in range(nr_executii):
        start_time = time.time()
        for i in range(k):
            randomArray = validSolution(obj)

            value = fitness(randomArray, obj[0], obj[1])
            if value > max:
                maxSol = randomArray
                max = value

        solutii.append([maxSol, max, start_time])

    functions.write(filename, k, nr_executii, solutii)
Esempio n. 6
0
def infer_profiles(fasta_file, files_dir, dummy_dir="/tmp/", latest=False, n=5,
    output_file=None, taxons=["fungi", "insects", "nematodes", "plants", "vertebrates"],
    threads=1):

    # Initialize
    base_name = os.path.basename(__file__)
    dummy_dir = os.path.join(dummy_dir, "%s.%s" % (base_name, os.getpid()))
    dummy_file = os.path.join(dummy_dir, "inferred_profiles.tsv")

    # Create dummy dir
    if not os.path.exists(dummy_dir):
        os.makedirs(dummy_dir)

    # Get sequences as SeqRecords
    # Note: https://biopython.org/wiki/SeqRecord
    seq_records = []
    for seq_record in SeqIO.parse(fasta_file, "fasta"):
        seq_records.append(seq_record)

    # Load JSON files
    global domains, jaspar
    domains, jaspar = _load_json_files(files_dir)

    # Write
    functions.write(dummy_file,
        "Query\tTF Name\tTF Matrix\tE-value\tQuery Start-End\tTF Start-End\tDBD %ID")
    # Infer SeqRecord profiles
    pool = Pool(threads)
    parallelization = partial(infer_SeqRecord_profiles, files_dir=files_dir,
        dummy_dir=dummy_dir, latest=latest, n=n, taxons=taxons)
    for inference_results in tqdm(pool.imap(parallelization, iter(seq_records)),
        desc="Profile inference", total=len(seq_records)):
        # Sort by E-value, TF Name and Matrix
        if latest:
            inference_results.sort(key=lambda x: (x[3], x[1], -float(x[2][2:])))
        else:
            inference_results.sort(key=lambda x: (x[3], x[1], float(x[2][2:])))
        # For each inference...
        for i in range(len(inference_results)):
            # Use the lastest version of JASPAR
            if latest and i > 0:
                if inference_results[i][2][:6] == inference_results[i - 1][2][:6]:
                    continue
            # Write
            functions.write(dummy_file, "\t".join(map(str, inference_results[i])))
    pool.close()
    pool.join()

    # Write
    if output_file:
        shutil.copy(dummy_file, output_file)
    else:
        with open(dummy_file) as f:
            # For each line...
            for line in f:
                functions.write(None, line.strip("\n"))

    # Remove dummy dir
    shutil.rmtree(dummy_dir)
Esempio n. 7
0
def ocr_pdf_page(path_pdf, save_folder):
    paths = misc.convert_pdf_to_image(path_pdf, dpi=300)

    for path in paths:
        save_path = os.path.join(
            save_folder, os.path.basename(path.replace('.png', '.csv')))
        print("save_path = {}".format(save_path))
        if not os.path.exists(save_path):
            img = cv2.imread(path)
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

            gray = ps.rotate_image(gray)

            print('Starting processing image ...')
            text = ps.segment_image(gray)

            with open(save_path, 'w') as f:
                f.write('{}\n'.format(','.join(HEADER)))
                for line in text:
                    f.write('{}\n'.format(','.join(line)))
            misc.clean_csv(save_path)
        else:
            print("{} already exists, returning".format(save_path))
Esempio n. 8
0
         continue
 # Get chunks #
 m = 10000;
 n = len(profile) - 1
 chunks = [sequence[i:i+m] for i in range(0, len(sequence), m - n)]
 # If last chunk is too small, merge it to the previous #
 if len(chunks[-1]) <= n: last_chunk = chunks.pop(-1); chunks[-1] += last_chunk
 # For each chunk... #
 for i in range(len(chunks)):
     # Initialize #
     relative_scores = {}
     chunk_start = i * (m - n)
     # Remove dummy FASTA file if exist #
     if os.path.exists(dummy_fasta): os.remove(dummy_fasta)
     # Create dummy FASTA file #
     functions.write(dummy_fasta, ">%s\n%s" % (header, chunks[i]))
     # For each jaspar match... #
     for chromosome, start, end, strand, relative_score in jaspar_search(pfm_file, dummy_fasta, options.rel_score_thresh):
         # Add to relative scores #
         relative_scores.setdefault((start + chunk_start, strand), int(relative_score * 1000))
     # For each fimo match... #
     for chromosome, start, end, strand, p_value in fimo_search(os.path.abspath(options.meme_dir), os.path.join(os.path.abspath(options.profiles_dir), "%s.meme" % options.matrix_id), dummy_fasta, options.p_value_thresh):
         # If match in relative scores... #
         if (start + chunk_start, strand) in relative_scores:
             functions.write(dummy_file, "%s\t%s\t%s\t%s" % (start + chunk_start, strand, relative_scores[(start + chunk_start, strand)], int(log(p_value) * 1000 / -10)))
     # Remove dummy FASTA file if exist #
     if os.path.exists(dummy_fasta): os.remove(dummy_fasta)
 # If dummy file exists... #
 if os.path.exists(dummy_file):
     # Compress #
     functions.compress(dummy_file, output_file)
Esempio n. 9
0
def cam(data):
    func.write(data)
    return redirect(url_for("index"))
#    # If JASPAR bundle... #
#    if options.jaspar_bundle is not None:
# Initialize #
    dummy_file = os.path.join(os.path.abspath(options.dummy_dir), "bundle.txt")
    profiles = {}
    header = []
    profile = None
    # If dummy file exists #
    if os.path.exists(dummy_file): os.remove(dummy_file)
    # For each line... #
    for line in functions.parse_file(os.path.join(options.jaspar_bundle)):
        # If header... #
        if line.startswith(">"):
            # Write #
            functions.write(dummy_file, line)
        # ... Else... #
        else:
            m = re.search("^\s*(\w)\s*\[(.+)\]\s*$", line)
            # Write #
            if m:
                functions.write(
                    dummy_file, "%s" % " ".join(
                        map(str, [
                            int(float(i)) for i in re.findall(
                                "[+-]?[0-9]*[.]?[0-9]+", m.group(2))
                        ])))
    # Reformat JASPAR profiles to MEME profiles #
    process = subprocess.check_output([
        os.path.join(os.path.abspath(options.meme_dir), "jaspar2meme"),
        "-bundle", dummy_file
Esempio n. 11
0
                res.append(item['user']['screen_name']+'\t'+item['id_str']+'\t'+text+'\t'+item['created_at'])
    
        for i in ngrams(tokens, 2):
            if ' '.join([j for j in i]) in A:
                a_values.append((item['id'], item['created_at']))
                res.append(item['user']['screen_name']+'\t'+item['id_str']+'\t'+text+'\t'+item['created_at'])
    
    a_values_counter+=len(a_values)
    b_values_counter+=len(b_values)
    
    if len(b_values)>0 or len(a_values)>0:
        temp['B_values'] = b_values
        temp['A_values'] = a_values
        file_name = file_name.split('.',-1)[0]
        result[file_name] = temp
    sys.stdout.write('\r%d/%d'%(counter,num_of_files))

print("\nnumber of tweets;", tweets_counter)

print("\nnumber of A values: ", a_values_counter)
print("number of B values: ", b_values_counter)
print("total values: ", a_values_counter+b_values_counter)

print("\nfinal A value:", round(a_values_counter/tweets_counter,4))
print("final B value:", round(b_values_counter/tweets_counter,4))
print("final total value:", round((a_values_counter+b_values_counter)/tweets_counter,4))

if save:
    functions.write_json(result, output_dir+'spelling.json')
    functions.write(res, output_dir+'spelling.txt')
Esempio n. 12
0
                    fr.extend(page)
                    time.sleep(20)

                temp['friends'] = fr

                data.append({name: temp})
                counter += 1
                print('%d/%d \t %s done' % (counter, len(usernames), name))

            else:
                counter += 1
                print(
                    '%d/%d \t """%s""" is a private/verified account or has many contats'
                    % (counter, len(usernames), name))
        except:
            counter += 1
            print('%d/%d \t """%s""" This account does not exist anymore' %
                  (counter, len(usernames), name))

    if save:
        path = res_dir + username
        os.mkdir(path)
        functions.write_json_lst(data, res_dir + username + '_complete.txt')
        functions.write(usernames, res_dir + username + '_usernames.txt')
        functions.write(ids, res_dir + username + '_ids.txt')
else:
    print('"""%s""" is a private/verified account or has many contats' %
          (username))

print("\nDONE")
Esempio n. 13
0
            a = input('1)Press ENTER to roll again with the same bet\n\
2)enter a for a different bet\n\
3)enter x to cash your balance\n')
        elif money == 0:
            a = 'x'
        #case2: user change a bet
        else:
            bet1, bet2, bet3, tbet = functions.bet(money, tbet)
            casino_vault += tbet
            money -= tbet
            functions.show_bet(bet1, bet2, bet3)
            money, casino_vault = functions.roll1(casino_vault, money, bet1,
                                                  bet2, bet3, tbet)
            print('=' * 70)
            print('')
            print('Your balance: ', format(money, ',.2f'), '$', sep='')
            print('casino vault: ', format(casino_vault, ',.2f'), '$', sep='')
            print('')
            a = input('1)Press ENTER to roll again with the same bet\n\
2)Press a for a different bet\n\
3)enter x to cash your balance\n')
    else:
        a = 'x'
functions.ending_chosing(casino_vault, money)
functions.write(casino_vault)
functions.score_board(money)

#call the main function
#How to prevent bad input like letters?
#improve the prize mechanism
Esempio n. 14
0
arg = sys.argv
directory = os.path.dirname(os.path.realpath(__file__))
config = open(f"{directory}/.config.txt", "r")
status = True if config.readlines()[0] == "init" else False

if len(arg) == 1:
    print("\033[1;33mOlá! Bem-vindo ao Header!\033[0;0m")
    print("\033[1;32mStatus do Header: iniciado\033[0;0m" if status ==
          True else "\033[1;31mStatus do Header: não iniciado\033[0;0m")
    print("\033[1;32mPara obter ajuda no uso da ferramenta, digite:\033[0;0m")
    print("python3 header.py help")
else:
    arg.pop(0)

if status and arg[0].lower() == "w":
    print(functions.write(arg))
elif status and arg[0].lower() == "ws":
    print(functions.write_save(arg, directory))
elif status and arg[0].lower() == "s":
    print(functions.save(arg, directory))
elif status and arg[0].lower() == "c":
    print(functions.create(arg, directory))
elif not status and arg[0].lower() == "init":
    print(functions.init(directory))
elif status and arg[0].lower() == "init":
    print("Seu Header já foi iniciado!")
elif status and arg[0].lower() == "list":
    functions.list_ws(directory)
elif status and arg[0].lower() == "e":
    print(functions.edit(arg, directory))
elif status and arg[0].lower() == "d":
Esempio n. 15
0
s_area=f.square_area(3,4)
print(s_area)

###############FILE HANDLING###############


import os
 if os.path.exists("C://python notes//aa.txt"):
    os.remove("C://python notes\\aa.txt")
 else:
    print("file does not exist")

###########
f=open("C://python notes\\a.txt","w")
f.write("adding some data to the file. ")
f.close()

f=open("C://python notes//a.txt","r")
print(f.readline())

f=open("C://python notes//a.txt","a")
f.write("Adding some more data!\n yes.")

f=open("C://python notes//a.txt","r")
print(f.readline())

import os
os.rmdir("C://python notes//AA")

import os
Esempio n. 16
0
        print(ans)
        break

    # get game info
    homeTeamId = read(path + 'homeTeamId.txt')
    awayTeamId = read(path + 'awayTeamId.txt')
    if gameId != read(path + 'gameId.txt'):
        gameId = read(path + 'gameId.txt')
        print('Current game: {}'.format(read(path + 'gameName.txt')))

    # get time remaining
    try:
        clockRunning = data[gameId]['clockRunning']
        timeRemaining = data[gameId]['timeRemaining']
        # write time remaining to file
        write(format_timeRemaining(timeRemaining), path + 'time.txt')
    except (KeyError, NameError):
        pass

    # get score
    try:
        write(data[gameId]['currentTeamScore'][homeTeamId],
              path + 'homeScore.txt')
    except (KeyError, NameError):
        pass
    try:
        write(data[gameId]['currentTeamScore'][awayTeamId],
              path + 'awayScore.txt')
    except (KeyError, NameError):
        pass
def scan_profile(input_file,
                 pwmscan_dir,
                 output_dir="./",
                 background=[0.25, 0.25, 0.25, 0.25],
                 p_value_thresh=0.05,
                 rel_score_thresh=0.8,
                 dummy_dir="/tmp/",
                 profile=None):
    """
    """

    # Initialize #
    bed_file = os.path.join(
        dummy_dir, "%s.%s.%s.bed" %
        (os.path.basename(__file__), os.getpid(), profile.matrix_id))
    pwm_file = os.path.join(
        dummy_dir, "%s.%s.%s.pwm" %
        (os.path.basename(__file__), os.getpid(), profile.matrix_id))
    tsv_file = os.path.join(
        dummy_dir, "%s.%s.%s.tsv" %
        (os.path.basename(__file__), os.getpid(), profile.matrix_id))
    output_file = os.path.join(output_dir, "%s.bed.gz" % profile.matrix_id)

    # Add background #
    profile.background = {
        "A": background[0],
        "C": background[1],
        "G": background[2],
        "T": background[3]
    }
    # Add JASPAR pseudocounts #
    profile.pseudocounts = motifs.jaspar.calculate_pseudocounts(profile)
    # Convert to PWMScan format #
    for i in range(len(profile.pssm["A"])):
        functions.write(
            pwm_file,
            "\t".join([str(int(profile.pssm[j][i] * 100)) for j in "ACGT"]))
    # Calculate distribution of matrix scores #
    try:
        process = subprocess.check_output(
            [os.path.join(pwmscan_dir, "matrix_prob"), pwm_file],
            stderr=subprocess.STDOUT)
        for line in process.split("\n"):
            m = re.search("(\S+)\s+(\S+)\s+(\S+)%", line)
            if m:
                score = m.group(1)
                p_value = float(m.group(2))
                perc = float(m.group(3))
                functions.write(
                    tsv_file, "%s\t%s\t%s" %
                    (score, int(perc * 10), int(log(p_value) * 1000 / -10)))
                if p_value < options.p_value_thresh and perc >= options.rel_score_thresh * 100:
                    cutoff = score
    except:
        raise ValueError("Could not calculate distribution of matrix scores!")
    # Scan DNA sequence for TFBS matches #
    try:
        bash_command = '''%s -m %s -c %s %s | awk -v score_tab="%s" -v name="%s" 'BEGIN { while((getline line < score_tab) > 0 ) {split(line,f," "); scores[f[1]]=f[2]; pvalues[f[1]]=f[3]} close(score_tab) } {print $1"\t"$2"\t"$3"\t"name"\t"scores[$5]"\t"pvalues[$5]"\t"$6}' | gzip > %s''' % (
            os.path.join(pwmscan_dir, "matrix_scan"), pwm_file, cutoff,
            os.path.abspath(
                options.input_file), tsv_file, profile.name, bed_file)
        process = subprocess.call(bash_command,
                                  shell=True,
                                  stderr=subprocess.STDOUT)
    except:
        raise ValueError("Could not scan DNA sequence file for TFBS matches!")
    # Write output #
    shutil.copy(bed_file, output_file)
    # Remove files #
    os.remove(bed_file)
    os.remove(pwm_file)
    os.remove(tsv_file)
Esempio n. 18
0
 if options.format == "csv": delimiter = ","
 if options.output_file is None: dummy_file = None
 else: dummy_file = os.path.join(os.path.abspath(options.dummy_dir), "%s.txt" % os.getpid())
 rel_score_thresh = int(options.rel_score_thresh * 1000) # transform relative score threshold
 p_value_thresh = int(log(options.p_value_thresh) * 1000 / -10) # transform p-value threshold
 # Remove dummy file if exist #
 if dummy_file is not None:
     if os.path.exists(dummy_file): os.remove(dummy_file)
 # Write #
 if options.format != "bed":
     header = delimiter.join(["chr", "start (1-based)", "end"])
     if options.scores == "rel_score": header += delimiter + "rel_score * 1000"
     elif options.scores == "p_value": header += delimiter + "p_value"
     else: header += delimiter + "rel_score * 1000" + delimiter + "-1 * log10(p_value) * 100"
     # Write #
     functions.write(dummy_file, header + delimiter + "strand")
 # For each matrix id and for each chr file... #
 for file_name in os.listdir(os.path.abspath(options.input_dir)):
     # Initialize #
     m = re.search("^(MA\d+\.\d)\.(chr\S+)\.tab\.gz$", file_name)
     if not m: continue
     matrix_id = m.group(1)
     chromosome = m.group(2)
     # Skip file if wrong matrix id #
     if options.matrix_id is not None:
         if matrix_id not in options.matrix_id.split(","): continue
     # Skip file if wrong chromosome #
     if options.chr is not None:
         if chromosome not in options.chr.split(","): continue
     # If no profile for matrix id... #
     if matrix_id not in profiles:
Esempio n. 19
0
def make_files(out_dir=os.path.dirname(os.path.realpath(__file__))):

    # Initialize
    cwd = os.getcwd()
    matrix_ids = set()
    codec = coreapi.codecs.CoreJSONCodec()
    uniprot = UniProt(verbose=False, cache=False)

    # Create output dir
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    # For each taxon...
    for taxon in taxons:
        # Skip if taxon profiles JSON file already exists
        profiles_json_file = os.path.join(
            out_dir, "%s.profiles.json" % taxon)
        if not os.path.exists(profiles_json_file):
            try:
                # Initialize
                profiles = {}
                client = coreapi.Client()        
                response = client.get(
                    "http://jaspar.genereg.net/api/v1/taxon/%s/" % taxon)
                json_obj = json.loads(codec.encode(response))
                # While there are more pages...
                while json_obj["next"] is not None:
                    # For each profile...
                    for profile in json_obj["results"]:
                        # If CORE collection profile...
                        if profile["collection"] == "CORE":
                            # Add profile
                            profiles.setdefault(profile["matrix_id"], profile["name"])
                    # Go to next page
                    response = client.get(json_obj["next"])
                    json_obj = json.loads(codec.encode(response))
                # Do last page
                for profile in json_obj["results"]:
                    # If CORE collection profile...
                    if profile["collection"] == "CORE":
                        # Add profile
                        profiles.setdefault(profile["matrix_id"], profile["name"])
                # Write
                functions.write(profiles_json_file, json.dumps(
                    profiles, sort_keys=True, indent=4, separators=(",", ": ")))
            except:
                raise ValueError("Could not fetch %s profiles from JASPAR" % taxon)

        # Skip if taxon uniprot JSON file already exists
        uniprot_json_file = os.path.join(out_dir, "%s.uniprot.json" % taxon)
        if not os.path.exists(uniprot_json_file):
            try:
                # Initialize
                uniaccs = {}
                client = coreapi.Client()
                # Load JSON file
                with open(profiles_json_file) as f:
                    profiles = json.load(f)
                # For each profile...
                for profile in sorted(profiles):
                    # Get profile detailed info
                    response = client.get(
                        "http://jaspar.genereg.net/api/v1/matrix/%s/" % profile)
                    json_obj = json.loads(codec.encode(response))
                    # Fix bugged cases
                    if json_obj["matrix_id"] == "MA0328.1":
                        json_obj["uniprot_ids"] = ["P0CY08"]
                    if json_obj["matrix_id"] == "MA0110.1":
                        json_obj["uniprot_ids"] = ["P46667"]
                    if json_obj["matrix_id"] == "MA0058.1":
                        json_obj["uniprot_ids"] = ["P61244"]
                    if json_obj["matrix_id"] == "MA0046.1":
                        json_obj["uniprot_ids"] = ["P20823"]
                    if json_obj["matrix_id"] == "MA0098.1":
                        json_obj["uniprot_ids"] = ["P14921"]
                    if json_obj["matrix_id"] == "MA0052.1":
                        json_obj["uniprot_ids"] = ["Q02078"]
                    if json_obj["matrix_id"] == "MA0024.1":
                        json_obj["uniprot_ids"] = ["Q01094"]
                    if json_obj["matrix_id"] == "MA0138.1":
                        json_obj["uniprot_ids"] = ["Q13127"]
                    # For each UniProt Accession...
                    for uniacc in json_obj["uniprot_ids"]:
                        # Strip
                        uniacc = uniacc.strip(" ")
                        # Add uniacc
                        uniaccs.setdefault(uniacc, [[], None])
                        if profile not in uniaccs[uniacc][0]:
                            uniaccs[uniacc][0].append(profile)
                # For each UniProt Accession...
                for uniacc in uniaccs:
                    # Get UniProt sequence
                    uniaccs[uniacc][1] = uniprot.get_fasta_sequence(uniacc)
                # Write
                functions.write(uniprot_json_file, json.dumps(
                    uniaccs, sort_keys=True, indent=4, separators=(",", ": ")))
            except:
                raise ValueError("Could not fetch %s sequences from UniProt" % taxon)

        # Skip if taxon FASTA file already exists
        fasta_file = os.path.join(out_dir, "%s.fa" % taxon)
        if not os.path.exists(fasta_file):
            # Load JSON file
            with open(uniprot_json_file) as f:
                uniaccs = json.load(f)
            # For each UniProt Accession...
            for uniacc in sorted(uniaccs):
                # Write
                functions.write(fasta_file,
                    ">%s\n%s" % (uniacc, uniaccs[uniacc][1]))
            # Create BLAST+ db
            try:
                process = subprocess.check_output([
                    "makeblastdb",
                    "-in", fasta_file,
                    "-dbtype", "prot"],
                    stderr=subprocess.STDOUT)
            except:
                raise ValueError("Could not create BLAST+ database: %s" % fasta_file)

    # Skip if Cis-BP JSON file already exists
    cisbp_json_file = os.path.join(out_dir, "cisbp.json")
    if not os.path.exists(cisbp_json_file):
        # Initialize
        proteins = {}
        prot_features = {}
        tfs = {}
        tf_families = {}
        # Create Cis-BP dir
        cisbp_dir = os.path.join(out_dir, "cisbp")
        if not os.path.exists(cisbp_dir):
            os.makedirs(cisbp_dir)
        # Change dir
        os.chdir(cisbp_dir)
        # Skip if TFs file already exists
        if not os.path.exists("cisbp_1.02.tfs.sql"):
            # Download SQL files
            os.system("curl --silent -O http://cisbp.ccbr.utoronto.ca/data/1.02/DataFiles/SQLDumps/SQLArchive_cisbp_1.02.zip")
            # Unzip
            os.system("unzip -qq SQLArchive_cisbp_1.02.zip")
            # Remove SQL files
            os.remove("SQLArchive_cisbp_1.02.zip")
            # For each ZIP file...
            for zip_file in frozenset(os.listdir(os.getcwd())):
                # Skip non-zip files
                if not zip_file.endswith(".zip"): continue
                # Unzip
                os.system("unzip -qq %s" % zip_file)
                os.remove(zip_file)
        # Return to original dir
        os.chdir(cwd)
        # Get protein features
        with open(os.path.join(cisbp_dir, "cisbp_1.02.prot_features.sql")) as f:
            # For each line...
            for line in f:
                m = re.search("\('.+', '(.+)', '.+', \d+, \d+, '(.+)'\)", line)
                if m:
                    prot_features.setdefault(m.group(1), set())
                    prot_features[m.group(1)].add(m.group(2))
        # Get TFs
        with open(os.path.join(cisbp_dir, "cisbp_1.02.tfs.sql")) as f:
            # For each line...
            for line in f:
                m = re.search("\('(.+)', '(.+)', '.+', '.+', '.+', '.+', '.+'\)", line)
                if m:
                    tfs.setdefault(m.group(1), m.group(2))
        # Get TF families
        with open(os.path.join(cisbp_dir, "cisbp_1.02.tf_families.sql")) as f:
            # For each line...
            for line in f:
                m = re.search("\('(.+)', '.+', '.+', \d+, (.+)\)", line)
                if m:
                    tf_families.setdefault(m.group(1), m.group(2))
        # Get proteins
        with open(os.path.join(cisbp_dir, "cisbp_1.02.proteins.sql")) as f:
            # For each line...
            for line in f:
                m = re.search("\('(.+)', '(.+)', '.+', '.+', '([A-Z]+)\W*'\)", line)
                if m:
                    if m.group(1) not in prot_features: continue
                    # Digest to MD5
                    h = hashlib.new("md5")
                    h.update(m.group(3).encode("utf-8"))
                    md5 = h.hexdigest() + m.group(3)[:4] + m.group(3)[-4:]
                    proteins.setdefault(md5, [tf_families[tfs[m.group(2)]], []])
                    # For each domain...
                    for domain in prot_features[m.group(1)]:
                        if domain not in proteins[md5][1]:
                            proteins[md5][1].append(domain)
        # Write
        functions.write(cisbp_json_file, json.dumps(
            proteins, sort_keys=True, indent=4, separators=(",", ": ")))
        # Remove Cis-BP dir
        shutil.rmtree(cisbp_dir)

    # Skip if JSON files already exist
    domains_json_file = os.path.join(out_dir, "domains.json")
    jaspar_json_file = os.path.join(out_dir, "jaspar.json")
    if not os.path.exists(domains_json_file) or not os.path.exists(jaspar_json_file):
        # Initialize
        domains = {}
        jaspar = {}
        # Load JSON file
        with open(cisbp_json_file) as f:
            cisbp = json.load(f)
        # Remove JSON files
        if os.path.exists(domains_json_file): domains_json_file
        if os.path.exists(jaspar_json_file): jaspar_json_file
        # For each taxon...
        for taxon in taxons:
            # Load JSON files
            profiles_json_file = os.path.join(
                out_dir, "%s.profiles.json" % taxon)
            with open(profiles_json_file) as f:
                profiles = json.load(f)
            uniprot_json_file = os.path.join(
                out_dir, "%s.uniprot.json" % taxon)
            with open(uniprot_json_file) as f:
                uniaccs = json.load(f)
            # For each UniProt Accession...
            for uniacc in sorted(uniaccs):
                # Skip if no sequence
                if uniaccs[uniacc][1] is None: continue
                # Digest to MD5
                h = hashlib.new("md5")
                h.update(uniaccs[uniacc][1].encode("utf-8"))
                md5 = h.hexdigest() + uniaccs[uniacc][1][:4] + uniaccs[uniacc][1][-4:]
                # If sequence in Cis-BP...
                if md5 in cisbp:
                    # Add to domains
                    domains.setdefault(uniacc, [cisbp[md5][1], cisbp[md5][0]])
                    # For each profile...
                    for profile in uniaccs[uniacc][0]:
                        # Add to JASPAR
                        jaspar.setdefault(uniacc, [])
                        jaspar[uniacc].append([profile, profiles[profile]])
        # Write
        functions.write(domains_json_file, json.dumps(
            domains, sort_keys=True, indent=4, separators=(",", ": ")))
        functions.write(jaspar_json_file, json.dumps(
            jaspar, sort_keys=True, indent=4, separators=(",", ": ")))
Esempio n. 20
0
                grams = []
                for j in ngrams(tokens, 2):
                    grams.append(' '.join([x for x in j]))

                for gram in grams:
                    if gram == i:
                        idx = grams.index(gram)
                        if idx < len(tags) - 2:
                            if tags[idx + 2][1] in ['VB', 'VBP']:
                                id_date.append(
                                    (item['id_str'], item['created_at']))

                                res.append(item['user']['screen_name'] + '\t' +
                                           item['id_str'] + '\t' + text +
                                           '\t' + item['created_at'])
    instance_counter += len(id_date)
    if len(id_date) > 0:
        file_name = file_name.split('.', -1)[0]
        result[file_name] = id_date
    sys.stdout.write('\r%d/%d' % (counter, num_of_files))

print("\nnumber of tweets;", tweets_counter)
print("found instaces: ", instance_counter)
print("final value:", round(instance_counter / tweets_counter, 4))

if save:
    functions.write_json(result,
                         output_dir + 'lexico-grammar(' + verb[0] + ').json')
    functions.write(res, output_dir + 'lexico-grammar(' + verb[0] + ').txt')
Esempio n. 21
0
def main():
    casino_vault = functions.read()
    #a is a sentinel deciding telling the program
    #which case it is
    a = ''
    #call the instrucion()
    functions.instruction_slot(casino_vault)
    #let user enter a balance
    money = 1000000
    #set default multiplier
    default_multiplier = 1
    #let user enter a multiplier
    multiplier = input('Enter your multiplier(DEFAULT 1):\n')
    multiplier = functions.valid2(multiplier, default_multiplier, money)

    #loop
    while not a == 'x':
        if casino_vault > 0:
            #case1: using the old multiplier and roll again
            if a == '':
                if money >= multiplier:
                    money -= multiplier
                    casino_vault += multiplier
                    print('==========================')
                    prize = functions.roll2(multiplier, casino_vault)
                    money += prize
                    casino_vault -= prize
                    print('')
                    print('Your balance: ', format(money, ',.2f'), '$', sep='')
                    print('')
                    #different value of a can allow user
                    #to roll again with the same multiplier;
                    #change the multiplier and roll again;
                    #or exit the game
                    a = input('1)Press ENTER to roll again\n\
2)enter new multiplier\n\
3)enter x to cash your balance\n')
                #case2: You run out of money or the current multiplier is too big
                else:
                    if money == 0:
                        a = 'x'
                    else:
                        #let user to decide
                        option = input(
                            'You do not have enough money for a roll\n\
1)enter a new multiplier\n\
2)enter x to exit\n')
                        #entering b: change the multiplier
                        if option == 'x':
                            a = 'x'
                        #entering anything else: exit the game
                        else:
                            multiplier = functions.valid2(
                                option, default_multiplier, money)
            #case3: user change a multiplier
            else:
                multiplier = functions.valid2(a, default_multiplier, money)
                money -= multiplier
                casino_vault += multiplier
                print('==========================')
                prize = functions.roll2(multiplier, casino_vault)
                money += prize
                casino_vault -= prize
                print('')
                print('Your balance: ', format(money, ',.2f'), '$', sep='')
                print('')
                a = input('1)Press ENTER to roll again\n\
2)enter new multiplier\n\
3)enter x to cash your balance\n')
        else:
            a = 'x'
    functions.ending_chosing(casino_vault, money)
    functions.write(casino_vault)
    functions.score_board(money)
Esempio n. 22
0
# 8. Show end time
tz_Sydney = pytz.timezone('Australia/Sydney')
datetime_Sydney_end = datetime.now(tz_Sydney)
print("\nYou finished at: ", datetime_Sydney_end.strftime("%H:%M:%S"))

# 9. Calculate and show total time spent in program
end_time = datetime.now()
duration = end_time - start_time
duration_in_seconds = duration.total_seconds()
minutes = round(duration_in_seconds / 60, 2)

# Log in time in time_log file
f = open("time_log", "a")
datetime_Sydney = str(datetime_Sydney)
f.write(f"\n{datetime_Sydney}")
f.close()

# 10. Log goal in goal_log file
f = open("goal_log", "a")
today = date.today()
datetime_Sydney = str(datetime_Sydney)
f.write(f"\n {today}: {today_goal.capitalize()}")
f.close()

# 11. Print time spent in program
print(f"\nYou spent {minutes} minutes logged on today.\n")

# 12. Exit message
print(
    f"Remember your goal for today: {today_goal.capitalize()}. Have a great day!"
Esempio n. 23
0
            bathrooms = 2
            garages = 2
            #UV = "$499,000"
            #EER = "2.3"
            #blocksize = 788
            housesize = 50
            #------------- 

            new_line = new_feature.format(node,propertytype,name,url,address,price,bedrooms,bathrooms,garages,UV,EER,blocksize,housesize,timestamp,longitude,latitude)

            # if a == len(address_list)-1:
            #    output = output + new_line # if the feature is the last don't add a comma
            # else:
            output = output + new_line + "\n,\n" 


        print '-------------------'
# remove last line?

output = output + postamble


#print output



# write geojson output to file
f = open("addresses.geojson","w+")
f.write(output)
f.close()
    t = []
    for item in data:
        if len(item['entities']['hashtags']) != 0:
            for h in item['entities']['hashtags']:
                if h['text'].lower() in pattern:
                    t.append((item['id_str'], item['created_at'],
                              h['text'].lower()))
                    hashtag_counter += 1

                    text = item['full_text']
                    text = re.sub(r"http\S+", " ", text)
                    text = re.sub(r"\s+", " ", text)
                    res.append(item['user']['screen_name'] + '\t' +
                               item['id_str'] + '\t' + text + '\t' +
                               item['created_at'])

    if len(t) > 0:

        file_name = file_name.split('.', -1)[0]
        result[file_name] = t
    sys.stdout.write('\r%d/%d' % (counter, num_of_files))

print("\nnumber of tweets;", tweets_counter)
print("found hashtags: ", hashtag_counter)
print("final value:", round(hashtag_counter / tweets_counter, 5))

if save:
    functions.write_json(result, output_dir + 'lexis(' + pattern[0] + ').json')
    functions.write(res, output_dir + 'lexis(' + pattern[0] + ').txt')
Esempio n. 25
0
subscribe['apiCommand'] = "subscribe"
subscribe['apiKey'] = read('apiKey.txt')
subscribe['requestId'] = "test connection"
subscribe['eventId'] = read('eventId.txt')

# setup WebSocket
ws = websocket.WebSocket()

# start ws connection
ws.connect(url)

# subscribe to feed
ws.send(json.dumps(subscribe))

# initialize files
write('Home', path + 'homeTeamName.txt')
write('Away', path + 'awayTeamName.txt')
reset_score()

# main loop
while (True):
    # get data
    ans = ws.recv()
    try:
        data = json.loads(ans)['data']
    except KeyError:
        print(ans)
        break

    # get team info
    try:
Esempio n. 26
0
def save_to_file(file_name, time, data):
    if file_name.endswith(".txt") is False:
        file_name += ".txt"

    with open(c.SAVE_LOCATION + file_name, "a") as f:
        f.write("{}\t{}\n".format(time, data))
Esempio n. 27
0
def save_to_file(file_name, time, data):
    if file_name.endswith(".txt") is False:
        file_name += ".txt"

    with open(c.SAVE_LOCATION + file_name, "a") as f:
        f.write("{}\t{}\n".format(time, data))