def trade_frame(self, compacted = True, cumulative = True):
     '''
     Returns a dataframe of daily cumulative return for each trade.
     Each row is a trade, and columns are days in trade.
     '''
     df = DataFrame(None, index = range(self.count), columns = range(self.max_duration), dtype = float)
     for i, trade in enumerate(self.items):
         df.loc[i] = trade.cumulative
     if not cumulative:
         df = ((df + 1).T / (df + 1).T.shift(1)).T - 1
     if compacted and df.shape[1] > 10:
         cols = [(11, 15), (16, 20), (21, 30), (31, 50), (51, 100), (101, 200)]
         trade_df = df.loc[:, 1:10]
         trade_df.columns = trade_df.columns.astype(str)
         for bounds in cols:
             if df.shape[1] <= bounds[1]:
                 label = '{}+'.format(bounds[0])
                 trade_df[label] = df.loc[:, bounds[0]:].mean(axis = 1)
                 break
             else:
                 label = '{}-{}'.format(*bounds)
                 trade_df[label] = df.loc[:, bounds[0]:bounds[1]].mean(axis = 1)
         final_bound = cols[-1][1]
         if df.shape[1] > final_bound:
             label = '{}+'.format(final_bound + 1)
             trade_df[label] = df.loc[:, (final_bound + 1):].mean(axis = 1)
         return trade_df
     else:
         return df
Example #2
0
    def _process_solutions(self):
        processed_solutions = DataFrame(columns=["reactions", "size", "fva_min", "fva_max",
                                                 "target_flux", "biomass_flux", "yield", "fitness"])

        if len(self._swaps) == 0:
            logger.warn("No solutions found")
            self._processed_solutions = processed_solutions

        else:
            progress = ProgressBar(maxval=len(self._swaps), widgets=["Processing solutions: ", Bar(), Percentage()])
            for i, solution in progress(enumerate(self._swaps)):
                try:
                    processed_solutions.loc[i] = process_reaction_swap_solution(
                        self._model, solution[0], self._simulation_method, self._simulation_kwargs, self._biomass,
                        self._target, self._substrate, self._objective_function, self._swap_pairs)
                except OptimizationError as e:
                    logger.error(e)
                    processed_solutions.loc[i] = [numpy.nan for _ in processed_solutions.columns]

            self._processed_solutions = processed_solutions
Example #3
0
    def __add__(self, other):
        df = DataFrame(columns=["knockouts", "knock_ins", "over_expression", "down_regulation", "type", "method"])
        i = 0
        for i, design in enumerate(self):
            df.loc[i] = list(design) + [design.manipulation_type, [self.__method_name__]]

        for j, design in enumerate(other):
            df.loc[i + j] = list(design) + [design.manipulation_type, [self.__method_name__]]

        df = df.groupby(["knockouts", "knock_ins",
                         "over_expression", "down_regulation", "type"]).aggregate(self._aggreate_functions_)

        return StrainDesignEnsemble(df.index.tolist(), df['method'].tolist())
Example #4
0
def seq_record_to_tiles(records, feature_types=["gene"]):
    for record in records:
        track = DataFrame(columns=["chromosome", "start", "end", "value", "options"])
        i = 0
        for feature in record.features:
            if feature.type in feature_types:
                track.loc[i] = (record.id, int(feature.location.start+1),
                                           int(feature.location.end),
                                           1, "")
                i += 1

        track["start"] = np.array(track["start"], dtype=int)
        track["end"] = np.array(track["end"], dtype=int)

        yield track
def scrapeJobs(job_postings):
	print "Scraping text from URLs..."
	for job,num in zip(job_postings,range(len(job_postings))):
		try:
			job.words = scrapeText('http://www.indeed.ca'+job.href)
		except: 
			job.words = "null"
			print "Failed scraping job %d" % num

	df = DataFrame({'Title':[],'Company':[],'Location':[],'Description':[]})
	df = df[['Title','Company','Location','Description']]
	for i in range(len(job_postings)):
		df.loc[i] = [job_postings[i].job_title,job_postings[i].company,
		job_postings[i].location,job_postings[i].words]
	df=df[df.Description!="null"]
	print "Successfully scraped text from %d jobs." % len(df)
	return df 
Example #6
0
    def __add__(self, other):
        df = DataFrame(columns=["knockouts", "knock_ins", "over_expression", "down_regulation", "type", "method"])
        i = 0
        for i, design in enumerate(self):
            df.loc[i] = list(design) + [design.manipulation_type, self._methods]

        for j, design in enumerate(other):
            if isinstance(other, StrainDesignEnsemble):
                df.loc[i + j] = list(design) + [design.manipulation_type, self._methods]
            else:
                df.loc[i + j] = list(design) + [design.manipulation_type, [self.__method_name__]]

        df = df.groupby(["knockouts", "knock_ins",
                         "over_expression", "down_regulation", "type"]).aggregate(self._aggreate_functions_)

        designs = [StrainDesign(row.values[:-1]) for _, row in df.iterrows()]

        return StrainDesignEnsemble(designs, df['method'].tolist())
def calculate_descriptors(smiles):
    """
    Description - Calculate descriptors using RDkit
    smile','logP','PSA','MolWt','RingCount','HeavyAtomCount','NumRotatableBonds
    """
    
    if args.verbose:
        print('########################## Calculate_Descriptors ##########################')

    descriptors_df = DataFrame(columns=('smile','logP','PSA','MolWt','RingCount','HeavyAtomCount','NumRotatableBonds'))

    i = 0
    for smile in smiles:
        #print smile
        try:
            m = Chem.MolFromSmiles(smile)
        except:
            print(smile)
            print('error')
    

        try:
            logP = Chem.Descriptors.MolLogP(m)
            PSA = Chem.Descriptors.TPSA(m)
            MolWt = Chem.Descriptors.MolWt(m)
            RingCount = Chem.Descriptors.RingCount(m)
            HeavyAtomCount = Chem.Descriptors.HeavyAtomCount(m)
            NumRotatableBonds = Chem.Descriptors.NumRotatableBonds(m)
        except:
            print('Error computing descriptors')
            logP = 0
            PSA = 0
            MolWt = 0
            RingCount = 0
            HeavyAtomCount = 0
            NumRotatableBonds = 0
 
        descriptors_df.loc[i] = ([smile,logP,PSA,MolWt,RingCount,HeavyAtomCount,NumRotatableBonds])
        i +=1

    if args.verbose:
        print(descriptors_df.columns)  
 
    return descriptors_df
Example #8
0
 def data_frame(self):
     df = DataFrame(columns=['targets'])
     for i, design in enumerate(self._designs):
         df.loc[i] = [design.targets]
     return df
import datetime
import pandas as pd


#def InvoiceParcing(InvoiceInput):
InvoiceInput = 'Invoice_070787'
checkdate = 0
Grisha = read_csv('Grisha.csv',sep = ',',names=['name','ClusterName'])

with open('test.txt') as data_file:
    data = json.load(data_file)
    i=0
df = DataFrame(columns = ('name','price'))
for InvoiceCount in data:
    if (InvoiceInput == InvoiceCount['invoice_id']):
        df.loc[i]=[InvoiceCount['article_name'],InvoiceCount['turnover_inc_vat']*InvoiceCount['amount']]
        checkdate = InvoiceCount['order_date']
    i+=1
"""for x in df:
    for y in Grisha:
        if (x['name'] == y['name']):
            x['name']= y['ClusterName']

print df
"""

for index, row in df.iterrows():
   tmp = df[df.name == row.name].sum('price')
   df = df[df.name != row.name]
   df.merge(tmp,on= "name")
Example #10
0
for c in soup('table')[6].findAll('tr')[7]:
    if c.string is not None:
        #print c.string
        col.append(c.string.strip())
col = filter(None, col)
df = DataFrame(columns= col)
df.dropna
print df.columns


# In[162]:

for i, row in enumerate(soup('table')[6].findAll('tr')[8:rows_count-3]):
    if row.find_all('td')[0].string:
        print i, row.find_all('td')[0].string, row.find_all('td')[1].string, row.find_all('td')[4].string, row.find_all('td')[7].string, row.find_all('td')[10].string
        df.loc[i] = row.find_all('td')[0].string, int(row.find_all('td')[1].string), int(row.find_all('td')[4].string), int(row.find_all('td')[7].string), int(row.find_all('td')[10].string)


# In[163]:

df


# In[151]:

conn = lite.connect('UN_education.db')
cur = conn.cursor()


# In[164]: