Ejemplo n.º 1
0
 def warning(self, msg, *args, **kwargs):
     MyLogger.warn_tcnt += 1
     # Get the message applying optional C style expansions
     if isinstance(msg, str) and len(args):
         buf = StringIO()
         buf.write(msg % args)
         buf = buf.getvalue()
     else:
         buf = str(msg)
     # Avoid repeated warnings
     if buf in MyLogger.warn_hash:
         MyLogger.warn_hash[buf] += 1
         return
     # Apply the filters
     if filters and buf.startswith('(W'):
         pos_end = buf.find(')')
         if pos_end > 0:
             number = int(buf[2:pos_end])
             for f in filters:
                 if f.number == number and f.regex.search(buf):
                     MyLogger.n_filtered += 1
                     return
     MyLogger.warn_cnt += 1
     MyLogger.warn_hash[buf] = 1
     if sys.version_info.major > 3 or (sys.version_info.major == 3
                                       and sys.version_info.minor >= 8):
         super().warning(buf, stacklevel=2, **kwargs)
     else:
         super().warning(buf, **kwargs)
Ejemplo n.º 2
0
def key_ratio(ticker,instrum):

    url= 'http://financials.morningstar.com/ajax/exportKR2CSV.html?t='+ ticker
    r = requests.get(url)
    content = r.content.decode("utf-8")
    if len(content) == 0 or content == 'We’re sorry. There is no available information in our database to display.':
        return ticker
    content = StringIO( content[content.find('ls\n')+3:])
    
    data = pd.read_csv(content, sep=',')
    data[data.columns[0]].fillna(method = 'ffill',inplace = True)
    data.set_index(data.columns[0], inplace = True)
    data.index.name = 'Financials'
    data.dropna(how = 'all', inplace = True)
    data.replace({',':''}, regex = True, inplace = True)
    new_dataframe = data[data[data.columns[-2]] == data.columns[-2]].index.tolist()
    
    result = {}
    
    for i in range(len(new_dataframe)+1):
        
        if i == 0:
            result[data.index.name] = data.loc[:new_dataframe[i],:][:-1].apply(lambda x: x.astype(float))
        elif i == len(new_dataframe):
            temp = data.loc[new_dataframe[-1]:,:]
            temp.index.name = temp.index.tolist()[0]
            temp = temp[1:]
            result[new_dataframe[i-1]] = temp.apply(lambda x: x.astype(float))  
        else:
            temp = data.loc[new_dataframe[i-1]:new_dataframe[i],:][:-1]
            temp.index.name = temp.index.tolist()[0]
            temp = temp[1:]
            result[new_dataframe[i-1]] = temp.apply(lambda x: x.astype(float))    
                
    return result
Ejemplo n.º 3
0
def search(path, query):
    reference = StringIO()
    with io.open(path, 'r') as file:
        try:
            for line in file:
                line = line.strip()
                if line[0] != '>':
                    reference.write(line)
            reference.seek(0)
            reference = reference.getvalue().upper()
            start = reference.find(query)
            while start >= 0:
                end = start + len(query)
                print('{}:{}'.format(start, end))
                start = reference.find(query, start + 1)

        except OSError as e:
            print('{} {}'.format(e.strerror, path))
Ejemplo n.º 4
0
def search(path, query):
    reference = StringIO()
    with io.open(path, 'r') as file:
        try:
            for line in file:
                line = line.strip()
                if line[0] != '>':
                    reference.write(line)
            reference.seek(0)
            reference = reference.getvalue().upper()
            start = reference.find(query)
            while start >= 0:
                end = start + len(query)
                print('{}:{}'.format(start, end))
                start = reference.find(query, start + 1)
                
        except OSError as e:
            print('{} {}'.format(e.strerror, path))
Ejemplo n.º 5
0
def grabPortalData(pointCancerUrl, cnaCancerUrl, uniqueGenes, instance,
                   cancer_study, run_study):

    for i in range(0, uniqueGenes.size, 100):

        element = uniqueGenes.iloc[i:i + 99].tolist()
        element = ','.join(element)

        try:
            #open the url, read the output, and convert from binary to utf-8

            url = "".join((pointCancerUrl + str(element)).split())
            pointPortalData = urlopen(url).read().decode("utf-8")
            #find the first line where 'entrez_gene_id' appears... this is the header column
            #also convert it to a string io object so it can be fed into read_csv
            #pointPortalData = StringIO(pointPortalData[pointPortalData.find('entrez_gene_id'):])

            #read the data into a dataframe
            pointDF = pd.read_csv(StringIO(
                pointPortalData[pointPortalData.find('entrez_gene_id'):]),
                                  sep='\t',
                                  error_bad_lines=False)

            pointDF.drop(pointDF.columns[[
                0, 3, 4, 6, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
            ]],
                         axis=1,
                         inplace=True)

            pointDF.rename(columns={'case_id': 'sample_id'}, inplace=True)

            pointDF['mutation_type'] = pointDF['mutation_type'].str.lower()

            pointDF['study_id'] = cancer_study

            pointDF['pointORcna'] = 'point'

            pointDF['ontology'] = pointDF['gene_symbol'].map(ontologyDict)

            instance.appendPointDF(pointDF)

        except:
            ensureDirectory('/Users/Rohil/Documents/Young Dawgs/' + run_study +
                            '/grabPortalDataErrors.txt')

            errorFile = open(
                '/Users/Rohil/Documents/Young Dawgs/' + run_study +
                '/grabPortalDataErrors.txt', 'a')
            errorFile.write(cancer_study +
                            ' returned no point mutation data for ' + element)

        try:

            url = "".join((cnaCancerUrl + str(element)).split())

            cnaPortalData = urlopen(url).read().decode("utf-8")

            cnaPortalData = StringIO(
                cnaPortalData[cnaPortalData.find('GENE_ID'):])

            cnaDF = pd.read_csv(cnaPortalData, sep='\t', error_bad_lines=False)

            cnaDF.drop('GENE_ID', axis=1, inplace=True)

            cnaDF.rename(columns={'COMMON': 'gene_symbol'}, inplace=True)

            cnaDF = cnaDF.melt('gene_symbol',
                               var_name='sample_id',
                               value_name='mutation_type')
            cnaDF['mutation_type'] = cnaDF['mutation_type'].map({
                -2.0:
                'homozygous_del',
                -1.0:
                'hemizygous_del',
                1.0:
                'gain',
                2.0:
                'high_lvl_amplification'
            })
            cnaDF.dropna(axis=0, inplace=True)

            cnaDF['pointORcna'] = 'cna'
            cnaDF['study_id'] = cancer_study

            cnaDF['ontology'] = cnaDF['gene_symbol'].map(ontologyDict)

            instance.appendCnaDF(cnaDF)

        except:
            ensureDirectory('/Users/Rohil/Documents/Young Dawgs/' + run_study +
                            '/grabPortalDataErrors.txt')

            errorFile = open(
                '/Users/Rohil/Documents/' + run_study +
                '/grabPortalDataErrors.txt', 'a')
            errorFile.write(cancer_study +
                            ' returned no cna mutation data for ' + element)