def warning(self, msg, *args, **kwargs): MyLogger.warn_tcnt += 1 # Get the message applying optional C style expansions if isinstance(msg, str) and len(args): buf = StringIO() buf.write(msg % args) buf = buf.getvalue() else: buf = str(msg) # Avoid repeated warnings if buf in MyLogger.warn_hash: MyLogger.warn_hash[buf] += 1 return # Apply the filters if filters and buf.startswith('(W'): pos_end = buf.find(')') if pos_end > 0: number = int(buf[2:pos_end]) for f in filters: if f.number == number and f.regex.search(buf): MyLogger.n_filtered += 1 return MyLogger.warn_cnt += 1 MyLogger.warn_hash[buf] = 1 if sys.version_info.major > 3 or (sys.version_info.major == 3 and sys.version_info.minor >= 8): super().warning(buf, stacklevel=2, **kwargs) else: super().warning(buf, **kwargs)
def key_ratio(ticker,instrum): url= 'http://financials.morningstar.com/ajax/exportKR2CSV.html?t='+ ticker r = requests.get(url) content = r.content.decode("utf-8") if len(content) == 0 or content == 'We’re sorry. There is no available information in our database to display.': return ticker content = StringIO( content[content.find('ls\n')+3:]) data = pd.read_csv(content, sep=',') data[data.columns[0]].fillna(method = 'ffill',inplace = True) data.set_index(data.columns[0], inplace = True) data.index.name = 'Financials' data.dropna(how = 'all', inplace = True) data.replace({',':''}, regex = True, inplace = True) new_dataframe = data[data[data.columns[-2]] == data.columns[-2]].index.tolist() result = {} for i in range(len(new_dataframe)+1): if i == 0: result[data.index.name] = data.loc[:new_dataframe[i],:][:-1].apply(lambda x: x.astype(float)) elif i == len(new_dataframe): temp = data.loc[new_dataframe[-1]:,:] temp.index.name = temp.index.tolist()[0] temp = temp[1:] result[new_dataframe[i-1]] = temp.apply(lambda x: x.astype(float)) else: temp = data.loc[new_dataframe[i-1]:new_dataframe[i],:][:-1] temp.index.name = temp.index.tolist()[0] temp = temp[1:] result[new_dataframe[i-1]] = temp.apply(lambda x: x.astype(float)) return result
def search(path, query): reference = StringIO() with io.open(path, 'r') as file: try: for line in file: line = line.strip() if line[0] != '>': reference.write(line) reference.seek(0) reference = reference.getvalue().upper() start = reference.find(query) while start >= 0: end = start + len(query) print('{}:{}'.format(start, end)) start = reference.find(query, start + 1) except OSError as e: print('{} {}'.format(e.strerror, path))
def grabPortalData(pointCancerUrl, cnaCancerUrl, uniqueGenes, instance, cancer_study, run_study): for i in range(0, uniqueGenes.size, 100): element = uniqueGenes.iloc[i:i + 99].tolist() element = ','.join(element) try: #open the url, read the output, and convert from binary to utf-8 url = "".join((pointCancerUrl + str(element)).split()) pointPortalData = urlopen(url).read().decode("utf-8") #find the first line where 'entrez_gene_id' appears... this is the header column #also convert it to a string io object so it can be fed into read_csv #pointPortalData = StringIO(pointPortalData[pointPortalData.find('entrez_gene_id'):]) #read the data into a dataframe pointDF = pd.read_csv(StringIO( pointPortalData[pointPortalData.find('entrez_gene_id'):]), sep='\t', error_bad_lines=False) pointDF.drop(pointDF.columns[[ 0, 3, 4, 6, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 ]], axis=1, inplace=True) pointDF.rename(columns={'case_id': 'sample_id'}, inplace=True) pointDF['mutation_type'] = pointDF['mutation_type'].str.lower() pointDF['study_id'] = cancer_study pointDF['pointORcna'] = 'point' pointDF['ontology'] = pointDF['gene_symbol'].map(ontologyDict) instance.appendPointDF(pointDF) except: ensureDirectory('/Users/Rohil/Documents/Young Dawgs/' + run_study + '/grabPortalDataErrors.txt') errorFile = open( '/Users/Rohil/Documents/Young Dawgs/' + run_study + '/grabPortalDataErrors.txt', 'a') errorFile.write(cancer_study + ' returned no point mutation data for ' + element) try: url = "".join((cnaCancerUrl + str(element)).split()) cnaPortalData = urlopen(url).read().decode("utf-8") cnaPortalData = StringIO( cnaPortalData[cnaPortalData.find('GENE_ID'):]) cnaDF = pd.read_csv(cnaPortalData, sep='\t', error_bad_lines=False) cnaDF.drop('GENE_ID', axis=1, inplace=True) cnaDF.rename(columns={'COMMON': 'gene_symbol'}, inplace=True) cnaDF = cnaDF.melt('gene_symbol', var_name='sample_id', value_name='mutation_type') cnaDF['mutation_type'] = cnaDF['mutation_type'].map({ -2.0: 'homozygous_del', -1.0: 'hemizygous_del', 1.0: 'gain', 2.0: 'high_lvl_amplification' }) cnaDF.dropna(axis=0, inplace=True) cnaDF['pointORcna'] = 'cna' cnaDF['study_id'] = cancer_study cnaDF['ontology'] = cnaDF['gene_symbol'].map(ontologyDict) instance.appendCnaDF(cnaDF) except: ensureDirectory('/Users/Rohil/Documents/Young Dawgs/' + run_study + '/grabPortalDataErrors.txt') errorFile = open( '/Users/Rohil/Documents/' + run_study + '/grabPortalDataErrors.txt', 'a') errorFile.write(cancer_study + ' returned no cna mutation data for ' + element)