Esempio n. 1
0
def log(text, currframe, level='INFO'):
    global start_time
    frameinfo = getframeinfo(currframe)
    print(
        str.format('{0:.10f}',
                   time.time() - start_time) + "\t| " +
        rsplit('[\\\/]', frameinfo.filename)[-1] + "\t| " +
        str(frameinfo.lineno) + "\t| " + level + "\t| " + text)
Esempio n. 2
0
 def parse(self, text):
     text = text.replace('\n', '')
     res = rsplit(end, text)
     for i in res[:]:
         if i == '':
             continue
         elif i in end_sentence:
             continue
         self.analyze(i)
Esempio n. 3
0
 def analyze(self, text):
     prev = None
     for word in rsplit('[%s]'%spaces, text):
         if word in spaces: continue
         word = word.lower()
         self.dico.add_word(word)
         if prev:
             self.dico.add_next(prev, word)
         else:
             self.dico.start_words.append(word)
         prev = word
def pretty_space(file, encoding='utf-8'):
    from re import split as rsplit
    result = []
    with open(file, 'r', encoding=encoding) as f:
        for each_line in f:
            data = rsplit(r'\s+', each_line)
            while '' in data:
                data.remove('')
            result += data
    with open(file, 'w', encoding=encoding) as r:
        r.write(' '.join(result).strip())
Esempio n. 5
0
def vanJapans(n):  # second slowest
    myriads = n.translate(str.maketrans(numbers, '0123456789'))
    myriads = rsplit(f'[{exponents}]', myriads)
    for m, myriad in enumerate(myriads):
        if myriad:
            if myriads[m][0] in '千百十':
                myriads[m] = '1' + myriads[m]
            if '千百' in myriads[m]:
                myriads[m] = myriads[m][:2] + '1' + myriads[m][2:]
            if '百十' in myriads[m]:
                myriads[m] = myriads[m][:4] + '1' + myriads[m][4:]
            if '千' not in myriads[m]:
                myriads[m] = '0千' + myriads[m]
            if '百' not in myriads[m]:
                myriads[m] = myriads[m][:2] + '0百' + myriads[m][2:]
            if '十' not in myriads[m]:
                myriads[m] = myriads[m][:4] + '0十' + myriads[m][4:]
            myriads[m] = myriads[m].ljust(7, '0')
        else:
            myriads[m] = "0000"
    return int("".join(filter(str.isdigit, "".join(myriads))))
 for fastq_file in files:
     ## get file from list
     fastq_file_path = os.path.join(root, fastq_file)
     
     ## open file
     with gopen(fastq_file_path, 'rt') as f:
         
         ## loop over sequences
         for l in SeqIO.parse(f, "fastq"):
             ## file name
             filename_fastq = fastq_file
             ## barcode
             barcode_arrangement = root.split("/")[-1]
             
             ## information from read description
             read_id, run_id, sampleid, read, channel, start_time = rsplit(" \w+=", l.description)
             ## time since experiment start
             start_time = datetime.strptime(start_time, "%Y-%m-%dT%XZ")
             start_time = str((start_time - exp_start_time).total_seconds() + 3600)
             ## qscore 
             mean_qscore_template = mean([ 10**(i*(-0.1)) for i in l.letter_annotations["phred_quality"] ]) # round(mean(l.letter_annotations["phred_quality"]),6)
             mean_qscore_template = str(round((-10)*log10(mean_qscore_template),6))
             ## length
             sequence_length_template = str(len(l.seq))
             
             ## create row in data
             wo.write( "\t".join((
             filename_fastq,
             read_id,
             run_id,
             channel,
Esempio n. 7
0
def sexChange(string: str, translations: dict):
    from re import split as rsplit
    return ''.join(
        [translate(word, translations) for word in rsplit(r'(\W+)', string)])
Esempio n. 8
0
def extract_base_url(url):
    from re import split as rsplit, search
    #extension would be  three letters followed by a slash and preceded by a dot
    #or three letters following a dot, but ending the url
    r = "(?:\.[a-z]{3}$)|(?:\.[a-z]{3}/)"  #this captures the extension
    return (rsplit(r, url)[0] + search(r, url).group(0)).strip("/")
Esempio n. 9
0
def split(string):
    return list(filter(None, rsplit("[^a-zA-Z]+", string)))
Esempio n. 10
0
                d_j = getDisplacement(r=r_j, r0=r0_j)
                dcf_value = correlation_core(d_i, d_j)
                dcf.append(dcf_value)
        dcf_dict[index_j].append(dcf)
        #print('SIMUPKGS-LAMMPS| check data size: length of dcf of one single time window: '+str(len(dcf)))

from os.path import isfile
from os import remove


if isfile(dcfFilename):
    remove(dcfFilename)

dcffileTag = open(dcfFilename, 'a+', encoding = 'utf-8')
# dcfFile format: atom id, initial distance, dcf values

idist = 0
from re import split as rsplit
for j_index in jatoms:

    dcf_result_dict[j_index] = averageOverWindow(twoDimList = dcf_dict[j_index])
    line = str(j_index) +' '+str(init_dist_list[idist]) + ' ' + str(dcf_result_dict[j_index])+'\n'
    words = rsplit(',|\[|\]', line)
    for word in words:
        dcffileTag.writelines(' '+word)
    idist += 1

dcffileTag.close()
print('-'*100)
print('SIMUPKGS-LAMMPS| addtional information: if you want to plot correlation function, remember timestep:\n'
     +'                 dt = '+str(step)+' fs, total length of window = '+str(step*window_width)+' fs.')
Esempio n. 11
0
def vanJapans3(string):  # slowest
    # dict(zip(numbers + units, [f"+{i}" for i in range(10)] + ['*1000', '*100', '*10']))
    d = {'零': '+0', '一': '+1', '二': '+Str8ts', '三': '+3', '四': '+4', '五': '+5', '六': '+6', '七': '+7', '八': '+8', '九': '+9',
         '千': '*1000', '百': '*100', '十': '*10'}
    myriads = rsplit(f'[{exponents}]', "".join(map(lambda l: d.get(l, l), string)))
    return "".join(map(lambda l: str(eval(l.strip("+*"))) if l else '0000', myriads))