コード例 #1
1
def DFtoExcel(df, FolderName, FileName):
    write_df = df.loc[:, ["FileName", "hyperlink", "Sheet Name"]]

    # Path Cell_Search_By_Key
    MainFolder = "C:\\Cell_Search_By_Key"
    FolderPath = os.path.join(MainFolder, FolderName)
    if not os.path.exists(FolderPath):
        os.makedirs(FolderPath)
    os.chdir(FolderPath)
    ExcelName = "%s.xlsx" % FileName
    writer = ExcelWriter(ExcelName)
    write_df.to_excel(writer, "Result", index=False)
    writer.save()
    # turn path into hyperlink
    Excel_Path = os.path.join(FolderPath, ExcelName)
    wb = Workbook(Excel_Path)
    # wb = Workbook.caller()
    checkArr = Range("B2").vertical.value
    i = 2
    for check in checkArr:

        RangeName = "B%d" % (i)
        displayRange = "A%d" % (i)
        address = Range(RangeName).value
        display_name = Range(displayRange).value
        i += 1
        try:
            Range(RangeName).add_hyperlink(address, text_to_display=address)
        except:
            pass
    wb.save()
    wb.close()
    return "FINISH"
コード例 #2
1
ファイル: actions.py プロジェクト: Giackgamba/l4s
    def generate_report(title, description):
        """Generate Excel  1997 file from query.

        :param title: Query title.
        :param description: Query description.
        :return: Response with Excel 1997 attachment.
        """
        df = load_data_frame(request)

        # Limit the columns to the maximum allowed in Excel 97.
        max_length = 255
        index_len = len(df.index.names)

        lim_df = df.drop(df.columns[max_length - index_len - 1:len(df.columns) - 1], axis=1)

        extension = 'xls'
        engine = 'xlwt'
        encoding = 'utf-8'
        content_type = 'application/vnd.ms-excel'
        # Add content and return response
        f = NamedTemporaryFile(suffix=extension)
        ew = ExcelWriter(f.name, engine=engine, encoding=encoding)

        #print lim_df.to_string()
        #print f.name

        lim_df.to_excel(ew)
        ew.save()


        #shutil.copyfile(f.name, 'manuel.xls')

        show_legend = request.REQUEST.get('show_legend', '')
        table_description = request.REQUEST.get('table_description', '')

        add_header_and_footer(f.name, title, description, show_legend, table_description)

        title = title.strip().encode("UTF-8").replace(" ", '_')

        if len(title) > max_length_filename:
            title = title[:max_length_filename]

        filename = '%s.%s' % (title, extension)

        # Setup response
        data = f.read()

        response = HttpResponse(data)
        response["Content-Type"] = content_type
        response["Content-status_code"] = 200
        response['Content-Transfer-Encoding'] = 'binary'
        response['Content-Disposition'] = 'attachment; filename="%s"' % filename
        return response
コード例 #3
0
ファイル: excel.py プロジェクト: cpcloud/pandas
class Excel(object):

    goal_time = 0.2
    params = ['openpyxl', 'xlsxwriter', 'xlwt']
    param_names = ['engine']

    def setup(self, engine):
        N = 2000
        C = 5
        self.df = DataFrame(np.random.randn(N, C),
                            columns=['float{}'.format(i) for i in range(C)],
                            index=date_range('20000101', periods=N, freq='H'))
        self.df['object'] = tm.makeStringIndex(N)
        self.bio_read = BytesIO()
        self.writer_read = ExcelWriter(self.bio_read, engine=engine)
        self.df.to_excel(self.writer_read, sheet_name='Sheet1')
        self.writer_read.save()
        self.bio_read.seek(0)

        self.bio_write = BytesIO()
        self.bio_write.seek(0)
        self.writer_write = ExcelWriter(self.bio_write, engine=engine)

    def time_read_excel(self, engine):
        read_excel(self.bio_read)

    def time_write_excel(self, engine):
        self.df.to_excel(self.writer_write, sheet_name='Sheet1')
        self.writer_write.save()
def create_output(regression_dist_dict, closest_curve_dict, reactor_name, name_add):
    '''Converts the dictionaries into dataframes to format for saving as
    an excel. The total resutls on the first sheet and closest curves on the second'''

    #creates a dataframe by looping through the dict and appending the df's together.
    count = 0
    print regression_dist_dict
    for key in regression_dist_dict:
        if count == 0:
            total_results = pd.DataFrame(regression_dist_dict[key], index=[key]*len(regression_dist_dict[key]), columns=['reactor', 'enrichment', 'distance'])
            closest_results = pd.DataFrame([closest_curve_dict[key]], index=[key], columns=['reactor', 'enrichment', 'distance'])
            count += 1
        else:
            total_results = total_results.append(pd.DataFrame(regression_dist_dict[key], index=[key]*len(regression_dist_dict[key]), columns=['reactor', 'enrichment', 'distance']))
            closest_results = closest_results.append(pd.DataFrame([closest_curve_dict[key]], index=[key], columns=['reactor', 'enrichment', 'distance']))

    print 'total_results', total_results
    print 'closest_results', closest_results

    file_name = 'data/%s_regression_results_%s.xlsx' % ('_'.join(map(str, reactor_name)), name_add)

    writer = ExcelWriter(file_name)

    total_results.to_excel(writer, sheet_name='Sheet1')
    closest_results.to_excel(writer, sheet_name='Sheet2')
    writer.save()
コード例 #5
0
def build_aggregates():

    writer = None
    years = range(2006,2010)
    for year in years:
        yr = str(year)
#        fname = "Agg_%s.%s" %(str(yr), "xls")
        simu = SurveySimulation()
        simu.set_config(year = yr)
        simu.set_param()
        simu.set_survey()
        inflator = get_loyer_inflator(year)
        simu.inflate_survey({'loyer' : inflator})
        simu.compute()

        agg = Aggregates()
        agg.set_simulation(simu)
        agg.compute()

        if writer is None:
            writer = ExcelWriter(str(fname_all))
        agg.aggr_frame.to_excel(writer, yr, index= False, header= True, float_format="%.2f")
        print agg.aggr_frame.to_string()
        del simu
        del agg
        import gc
        gc.collect()


    writer.save()
コード例 #6
0
def diag_aggregates():

    years = ['2006', '2007', '2008', '2009']

    df_final = None
    for yr in years:
        xls = ExcelFile(fname_all)
        df = xls.parse(yr, hindex_col= True)

        cols = [u"Mesure",
                u"Dépense \n(millions d'€)",
                u"Bénéficiaires \n(milliers)",
                u"Dépenses \nréelles \n(millions d'€)",
                u"Bénéficiaires \nréels \n(milliers)",
                u"Diff. relative \nDépenses",
                u"Diff. relative \nBénéficiaires"]
        selected_cols = [u"Mesure", u"Diff. relative \nDépenses", u"Diff. relative \nBénéficiaires"]
        df = df[selected_cols]
        df['year'] = yr
        df['num'] = range(len(df.index))
        df = df.set_index(['num', u'Mesure', 'year'])
        if df_final is None:
            df_final = df
        else:

            df_final = df_final.append(df, ignore_index=False)

#    DataFrame.groupby()
    df_final = df_final.sortlevel(0)
    print str(fname_all)[:-5]+'_diag.xlsx'
    writer = ExcelWriter(str(fname_all)[:-5]+'_diag.xlsx')
    df_final.to_excel(writer, sheet_name="diagnostics", float_format="%.2f")
    writer.save()
コード例 #7
0
ファイル: plot.py プロジェクト: liyistc/fantasy
def main():
  parser = argparse.ArgumentParser(description = 'Fantasy Data Visualization')
  parser.add_argument('players', metavar='PLAYER', \
                      type=int, nargs='*', help='ids of players to display')
  parser.add_argument('-d', '--display', type=int, \
                      choices=[10,25,50], default=10, help='number of rows to display')
  parser.add_argument('-e', '--excel', dest='excel', \
                      action='store_true', default=False, help='to excel')
  args = parser.parse_args()

  show = int(args.display) # number of stats to show
  stats = pd.DataFrame.from_csv('.cache/res_avg.csv')
  
  # write all stats to excel file
  if (args.excel):
    writer = ExcelWriter('.cache/res_avg.xlsx')
    stats.to_excel(writer, 'Sheet1')
    writer.save()
  
  # display plot
  if len(args.players) > 0:
    plot(stats=stats, players=args.players)

  # print short summary
  print stats.sort_values(by=['avg_2015'], ascending=[False]).head(show)
コード例 #8
0
def save_xls_name(list_dfs, xls_path, sheet_name):
    '''save function that takes a list as input to name sheets.'''

    #remove ascii characters from dataframes for saving
    for df in list_dfs:
        df.index = remove_non_ascii(df.index)
        for col in df.columns:
        df[col] = remove_non_ascii(df[col])

    #save the df's to an excel file
    writer = ExcelWriter(xls_path)
    for n, df in enumerate(list_dfs):
        df.to_excel(writer, sheet_name[n])
    writer.save()


def remove_non_ascii(col):
    '''remove ascii for saving to excel'''
    new_index = []
    for name in col:
        try:
            for letter in name:
                if ord(letter) > 128:
                    name = name.replace(letter, '')
        except:
            pass
        new_index.append(name)
    return new_index
コード例 #9
0
ファイル: MyFunx.py プロジェクト: spreee/SpreeScripts
def data_total( DocName, HistoryPath, SavePath ):
    
    files = os.listdir(HistoryPath)
    
    TotalData = pd.DataFrame()
    
    for file in files:    
        historyfile = os.path.join(HistoryPath, file)
        try:
            HistoryBook = pd.ExcelFile(historyfile)
            HistorySheet = HistoryBook.parse('Sheet1', skiprows = 0, index = None)
            
            TotalData = TotalData.append(HistorySheet)
        
        except IOError:
            print "Cannot read " + str(historyfile)
    
    TotalData.dropna(subset = ['ProductID'], inplace = True)
    TotalData.drop_duplicates(inplace = True)    
    
    filename = DocName + '.xlsx'
    filename = os.path.join(SavePath, filename)    
    
    writer = ExcelWriter(filename)
    TotalData.to_excel(writer, 'Sheet1', index = False )   
    writer.save()
    
    TotalData.to_csv(os.path.join(SavePath, DocName + '.txt'),sep=';',index=False, encoding = 'utf-8')
コード例 #10
0
    def save_table(self, directory = None, filename = None, table_format = None):
        '''
        Saves the table to some format
        '''
        now = datetime.now()
        if table_format is None:
            if filename is not None:
                extension = filename[-4:]
                if extension == '.xls':
                    table_format = 'xls'
                elif extension == '.csv':
                    table_format = 'csv'
            else:
                table_format = 'xls'

        if directory is None:
            directory = "."
        if filename is None:
            filename = 'Aggregates_%s.%s' % (now.strftime('%d-%m-%Y'), table_format)

        fname = os.path.join(directory, filename)

        try:
            df = self.aggr_frame
            if table_format == "xls":
                writer = ExcelWriter(str(fname))
                df.to_excel(writer, "aggregates", index= False, header= True)
                descr = self.create_description()
                descr.to_excel(writer, "description", index = False, header=False)
                writer.save()
            elif table_format == "csv":
                df.to_csv(fname, "aggregates", index= False, header = True)
        except Exception, e:
                raise Exception("Aggregates: Error saving file", str(e))
コード例 #11
0
def AddSeqComp(mypath):
    """ Loads TestLogAll.h5 from the specified path, then calls 
    MeasurementGroupTools.AddSeqComp to recalculate seq components using FFT  

    Input:  Directory of the measurment campaign, e.g.: "aLabView2"
    Output: Results1.h5, Results1.pdf in the data subdirs.
    """
    from pandas import HDFStore, ExcelWriter
    import MeasurementGroupTools as mgt

    h5logs = HDFStore(mypath + "\\" + 'TestLogsAll.h5')
    TestLog = h5logs['TestLogsAll']

    dirs = TestLog[u'DirName'].unique()
    for dname in dirs:
        mysubdirpath = mypath + "\\" + dname
        print "Processing: " + dname
        mgt.AddSeqComp(mysubdirpath, TestLog, dname)

    h5logs.put('TestLogsAll',TestLog)
    h5logs.close()

    writer = ExcelWriter(mypath + "\\" + 'TestLogsAll.xlsx')
    TestLog.to_excel(writer,'TestLogsAll') # the second argument defines sheet name
    writer.save()

    return
コード例 #12
0
ファイル: mypmr.py プロジェクト: shennjia/mypmr
    def dataIO(self, args):
        """
        IO data for possible extension
        """
        writer = ExcelWriter("{}.xlsx".format(args.logFile), engine='xlsxwriter')
        reportDf = pd.DataFrame()
        reportDf.to_excel(writer, sheet_name="Reports")
        contentDf = pd.DataFrame()
        contentDf.to_excel(writer, sheet_name="Contents")
        contentSheet = writer.sheets["Contents"]
        contentSheet.write_string(xl_rowcol_to_cell(self.sheetLinkRow, 0),
                                  "link list for all choices and sub refines".format(args.logFile))
        self.sheetLinkRow += 1

        for dfname in ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12']:
            if dfname in self._rawdf.keys():
                print("--save raw data for {}".format(dfname))
                self._rawdf[dfname].to_excel(writer, "{}".format(dfname))
                link_format = writer.book.add_format({'color': 'blue', 'underline': 1})
                contentSheet.write_url(xl_rowcol_to_cell(self.sheetLinkRow, 0), "internal:{}!A1".format(dfname),
                                       link_format, dfname)
                self.sheetLinkRow += 1
            if dfname in self._rawdf.keys() and dfname in ['2', '3', '5', '6', '8', '9', '11', '12']:
                self.refine(args, writer, dfname)

        # Close the Pandas Excel writer and output the Excel file.
        writer.save()
コード例 #13
0
    def build_and_send_email(self, data, options):
        date = timezone.now().date().strftime('%Y_%m_%d')

        if 'recipients' in options:
            print 'yes'
            recipients = options['recipients']
        else:
            print 'no'
            recipients = settings.DEFAULT_WEEKLY_RECIPIENTS

        print 'recipients:', recipients

        message = EmailMessage(subject='Kikar Hamedina, Weekly Report: %s' % date,
                               body='Kikar Hamedina, Weekly Report: %s.' % date,
                               to=recipients)
        w = ExcelWriter('Weekly_report_%s.xlsx' % date)

        for datum in data:
            # csvfile = StringIO.StringIO()
            pd.DataFrame.from_dict(datum['content']).to_excel(w, sheet_name=datum['name'])

        w.save()
        w.close()
        # f = open(w.path, 'r', encoding='utf-8')
        message.attach_file(w.path)
        message.send()
コード例 #14
0
def generate_response_time_stats(state_name,state_slug):
    is_state = df_requests['jurisdiction'] == state_name
                   
    state_req_ids = df_requests.loc[is_state & is_complete,'id']
    is_state_msg = df_messages['request_id'].isin(state_req_ids.tolist())
    
    df_state_msgs = df_messages[is_state_msg]
    msgs_grouped = df_state_msgs.groupby('request_id')
    
    msg_resp_times = msgs_grouped.apply(compute_response_time)
    msg_resp_times_resolved = msg_resp_times[msg_resp_times['status']=='resolved'][['public_body','response_time']]
    msg_resp_times_resolved['days'] = msg_resp_times_resolved['response_time'].dt.days
    
    resp_times_by_pbody = msg_resp_times_resolved.groupby('public_body')['days'].agg({'avg_response_time':np.mean,\
                                                                    'n_requests':np.size})
    resp_times_by_pbody.sort_values('avg_response_time',ascending=False,inplace=True)
    resp_times_by_pbody.iloc[0:15]['avg_response_time'].plot(kind='barh')
    plt.xlabel('Tage')
    plt.legend([])
    plt.title('Durchschnittliche Antwortzeiten')
    plt.savefig(figures_path+'response_times'+state_slug+'.png', bbox_inches='tight',dpi=120)
    plt.close()
    
    writer = ExcelWriter(files_path + 'response_times_per_pbody'+state_slug+'.xlsx')
    resp_times_by_pbody.to_excel(writer)
    writer.save()
    
    writer = ExcelWriter(files_path + 'response_times_raw'+state_slug+'.xlsx')
    msg_resp_times_resolved.to_excel(writer)
    writer.save()
コード例 #15
0
def to_mem_excel(dataframe, sheet_name='WorkSheet'):
    iobuffer = BytesIO()
    writer = ExcelWriter(iobuffer, engine='xlwt')
    dataframe.to_excel(writer, sheet_name=sheet_name)
    writer.save()
    iobuffer.flush()
    iobuffer.seek(0)
    return iobuffer.getvalue()
コード例 #16
0
ファイル: cherry.py プロジェクト: mcvmcv/cherry
	def saveDialog(self):
		'''Saves the project as an .xls file.'''
		title									= 'Save project as...'
		fileName,f								= QFileDialog.getSaveFileName(self,title,self.path)
		writer									= ExcelWriter(fileName+'.xls')
		for marker in self.markers:
			marker.table.to_excel(writer,marker.name)
		writer.save()
コード例 #17
0
def writeToExcel(fileName=''):
	print "Writing to Excel File : "+fileName
	data = {'CVE ID Number': cveIDNumber, 'Summary Text': summaryText, 'Publish Date': publishDate, 'Software Type': softwareType, 'Vendor': vendor,'Product':product,'Version':version,'CVSS Score':cvssScore,'Confidentiality Impact':confidentialityImpact,'Integrity Impact':integrityImpact,'Availibility Impact':availibilityImpact,'Access Complexity':accessComplexity,'Authentication':authentication,'Gained Access':gainedAccess,'Vulnerability Type':vulnType}
	df = pd.DataFrame(data,columns=['CVE ID Number','Publish Date', 'Software Type','Vendor','Product','Version','CVSS Score','Confidentiality Impact','Integrity Impact','Availibility Impact','Access Complexity','Authentication','Gained Access','Vulnerability Type','Summary Text'])
	writer = ExcelWriter(fileName)
	df.to_excel(writer,'CVE Details',index=False)
	writer.save()
	print "Completed."
コード例 #18
0
def save_xlsx(list_dfs, xlsx_path):
    writer = ExcelWriter(xlsx_path)
    for n, df in enumerate(list_dfs):
        df.to_excel(writer, '%s' %n)
        print('Saving %s' %n)
    writer.save()
    print('Finished writing to file')
    return None
コード例 #19
0
ファイル: corpus.py プロジェクト: estnltk/pfe
def corpus_to_excel(corpus_path, excel_path):
    '''NB! Make sure to use .xls file extension for Excel files.'''
    corpus = PyCorpus(corpus_path)
    writer = ExcelWriter(excel_path)
    for key in corpus:
        corpus[key].to_excel(writer, sheet_name=key)
    writer.save()
    corpus.close()
コード例 #20
0
ファイル: data_utils.py プロジェクト: gmartinvela/Incubator
def extract_SHT1x_data_day_by_day(SHT1x_dataframe, days_list):
	# the 'with' statement dont work
	today = date.today()
	writer = ExcelWriter('static/data/SHT1x.xlsx')
    	for day in days_list:
    		if day <= today:
    			day_SHT1x = SHT1x_dataframe[str(day)]
        		day_SHT1x.to_excel(writer, sheet_name=str(day))
    	writer.save()
コード例 #21
0
ファイル: utils.py プロジェクト: byeungchun/minlab
def save_peaks_excel(peakOnlyHdf5,xlsxFile):
    dsets = h5py.File(peakOnlyHdf5,'r')
    writer = ExcelWriter(xlsxFile)
    for _key in dsets.keys():
        dset = dsets[_key]
        _df = pd.DataFrame(list(dset))
        _df.to_excel(writer,_key,header=False, index=False)
        print(_key+'sheet is created')
    writer.save()
    writer.close()
コード例 #22
0
ファイル: sl_mode.py プロジェクト: laharl/test_scripts
def slmode(sheet, size):
	writer = ExcelWriter("sw_mode_" + str(size) + "t_" + sheet + ".xlsx")
	columnas = dfs[str(sheet)].columns # store columns names
	length = len(dfs[str(sheet)].columns)
	new_df = pd.DataFrame(dfs[str(sheet)].iloc[:,0])
	for i in range(1,length-(size-1)):
		for j in range(0,(size)):
			new_df[str(columnas[j+i])] = dfs[str(sheet)].iloc[:,j+i]
		new_df.to_excel(writer,"set_" + str(i), index=False)
		new_df = pd.DataFrame(dfs[str(sheet)].iloc[:,0])
	writer.save()
コード例 #23
0
ファイル: excel.py プロジェクト: anton-khodak/gtm-app
def export_to_xls(df, path, format_excel=None, engine='xlsxwriter', send=False):
    writer = ExcelWriter(path,
                         engine=engine,
                         datetime_format='hh:mm:ss mmm d yyyy',
                         date_format='mmmm dd yyyy')
    df.to_excel(writer)
    writer.save()
    if format_excel: format_excel(path)
    if send:
        send_file_by_email(path)
    else:
        return download_file(path)
コード例 #24
0
ファイル: data_utils.py プロジェクト: gmartinvela/Incubator
def extract_thermo_data_day_by_day(thermo_dataframe, days_list):
	# the 'with' statement dont work
	# replace dont work properly
	#thermo_dataframe_sustituted = thermo_dataframe.replace({'0': 'OFF', '1': 'ON'})
	#print thermo_dataframe_sustituted
	today = date.today()
	writer = ExcelWriter('static/data/thermo.xlsx')
    	for day in days_list:
    		if day <= today:
    			day_thermo = thermo_dataframe[str(day)]
        		day_thermo.to_excel(writer, sheet_name=str(day))
    	writer.save()
 def save_xls(self, dframe):  # 把数据写到已行业命名的excel文件的名字sheet
     xls_path = os.path.join(current_folder, '筛选后股票的财务报表', self.hangye)
     if os.path.exists(xls_path):  # excel 文件已经存在
         book = load_workbook(xls_path)
         writer = pd.ExcelWriter(xls_path, engine='openpyxl')
         writer.book = book
         writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
         dframe.to_excel(writer, self.name)
         writer.save()
     else:  # 文件还不存在
         writer = ExcelWriter(xls_path)
         dframe.to_excel(writer, self.name)
         writer.save()
コード例 #26
0
ファイル: _classif.py プロジェクト: EtienneCmb/brainpipe
    def to_excel(self, filename='myfile.xlsx'):
        """Export informations to a excel file

        Kargs:
            filename: string
                Name of the excel file ex: filename='myfile.xlsx'
        """
        writer = ExcelWriter(filename)
        self.clfinfo.to_excel(writer,'Classifier')
        self.statinfo.to_excel(writer,'Statistics')
        try:
            self.featinfo.to_excel(writer,'Features')
        except:
            warn('Informations about features has been ignored. Run fit()')
        writer.save()
コード例 #27
0
def to_excel():
    DR = data_recording.DataRecorder(db_name="PRIVATE/result.sqlite")
    sql = "Select * from rep"
    DR.con.row_factory = sqlite3.Row
    cursor = DR.con.execute(sql)

    rows = cursor.fetchall()
    DF = pd.DataFrame(rows, columns=[item[0] for item in cursor.description])

    # nattention : il faut que le chemin existe.        
    writer = ExcelWriter(conf_file.EXPORT_REP+'/'+'fact_excel.xlsx')
    DF.to_excel(writer, sheet_name='data_fact')
    
    writer.save()
    print("Le fichier a été sauvé dans {}".format(conf_file.EXPORT_REP+'/'+'fact_excel.xlsx'))
コード例 #28
0
def networkset_2_spreadsheet(ntwkset, file_name=None, file_type= 'excel', 
    *args, **kwargs):
    '''
    Write a NetworkSet object to a spreadsheet, for your boss    
    
    Write  the s-parameters  of a each network in the networkset to a 
    spreadsheet. If the `excel` file_type is used, then each network, 
    is written to its own sheet, with the sheetname taken from the
    network `name` attribute.
    This functions makes use of the pandas module, which in turn makes
    use of the xlrd module. These are imported during this function
    
    Notes
    ------
    The frequency unit used in the spreadsheet is take from 
    `ntwk.frequency.unit`
    
    Parameters
    -----------
    ntwkset :  :class:`~skrf.networkSet.NetworkSet` object
        the network to write 
    file_name : str, None
        the file_name to write. if None,  ntwk.name is used. 
    file_type : ['csv','excel','html']
        the type of file to write. See pandas.DataFrame.to_??? functions.
    form : 'db','ma','ri'
        format to write data, 
        * db = db, deg
        * ma = mag, deg
        * ri = real, imag
    \*args, \*\*kwargs :
        passed to pandas.DataFrame.to_??? functions.
        
        
    See Also
    ---------
    networkset_2_spreadsheet : writes a spreadsheet for many networks
    '''
    from pandas import DataFrame, Series, ExcelWriter # delayed because its not a requirement
    if ntwkset.name is None and file_name is None:
        raise(ValueError('Either ntwkset must have name or give a file_name'))
    
    if file_type == 'excel':
        writer = ExcelWriter(file_name)
        [network_2_spreadsheet(k, writer, sheet_name =k.name, *args, **kwargs) for k in ntwkset]
        writer.save()
    else:
        [network_2_spreadsheet(k,*args, **kwargs) for k in ntwkset]
コード例 #29
0
def main(project_id, dataset_id, table_names):

    # create a excel writer instance
    writer = ExcelWriter('bq_sanity_check_reports.xlsx')

    for table_name in table_names.split(";"):
        query = 'SELECT * FROM {0}.{1}'.format(dataset_id, table_name)
        
        df = convert_gbq_to_df.run(project_id, query)
    
        #Summarizing data
        df_stats = df.describe(include='all').transpose()
    
        # write to an excel sheet
        df_stats.to_excel(writer, sheet_name=table_name)
        writer.save()
コード例 #30
0
ファイル: export.py プロジェクト: austinluong/fit-extract
def export(params, path, paramsToGroupBySize, hasCycles):
    """Formats extracted data and exports to Data.xlsv"""
    paramToUnit, Files = extractFolder(params, path,
                                       paramsToGroupBySize, hasCycles)
    channelToFiles = groupFilesByChannel(Files)
    writer = ExcelWriter(path + 'Data.xlsx')  # Needed to save multiple sheets

    # Iterate through channels
    currentChannelIndex = 1
    numOfChannels = len(channelToFiles)
    for channel in channelToFiles:
        extractedValues = {p: [] for p in params}
        names = []
        cyclesColumn = []

        # Obtain list of values and names from files in channel
        for File in channelToFiles[channel]:
            if hasCycles:
                appendFileInfoCycles(File, params, extractedValues,
                                     names, cyclesColumn)
            else:
                appendFileInfo(File, params, extractedValues, names)

        # Create table / DataFrame
        table = {'{} ({})'.format(p, paramToUnit[p]): extractedValues[p]
                 for p in params}
        df = DataFrame(table)
        df.insert(0, 'File Name', names)
        if hasCycles:
            df.insert(1, 'Cycle', cyclesColumn)
        sheet = 'Ch. ' + channel

        # Add sheets and autofit column dimesntions
        df.to_excel(writer, sheet_name=sheet, index=False)
        writer.sheets[sheet].column_dimensions['A'].width = len(
            max(names, key=len))

        # Message
        print('--Successfully extracted '
              'from {} ({} of {})'.format(sheet,
                                          currentChannelIndex,
                                          numOfChannels))
        currentChannelIndex += 1

    # Export
    writer.save()
    print('')
コード例 #31
0
def test_all_nets(fold):
    data = 2
    Server = 'shark'
    if Server == 'DL':
        parent_path = '/srv/2-lkeb-17-dl01/syousefi/TestCode/EsophagusProject/sythesize_code/'
        data_path = '/srv/2-lkeb-17-dl01/syousefi/TestCode/EsophagusProject/Data-01/BrainWeb_permutation2_low/'
    else:
        parent_path = '/exports/lkeb-hpc/syousefi/Code/ASL_LOG/debug_Log/synth-' + str(
            fold) + '/'
        data_path = '/exports/lkeb-hpc/syousefi/Synth_Data/BrainWeb_permutation2_low/'

    img_name = ''
    label_name = ''

    _rd = _read_data(data=data,
                     img_name=img_name,
                     label_name=label_name,
                     dataset_path=data_path)
    '''read path of the images for train, test, and validation'''
    train_data, validation_data, test_data = _rd.read_data_path()
    # parent_path='/srv/2-lkeb-17-dl01/syousefi/TestCode/EsophagusProject/sythesize_code/Log/synth-12/'

    chckpnt_dir = parent_path + 'unet_checkpoints/'
    result_path = parent_path + 'results/'

    if test_vali == 1:
        test_set = validation_data
    elif test_vali == 2:
        test_set = train_data
    else:
        test_set = test_data
    # image=tf.placeholder(tf.float32,shape=[batch_no,patch_window,patch_window,patch_window,1])
    # label=tf.placeholder(tf.float32,shape=[batch_no_validation,label_patchs_size,label_patchs_size,label_patchs_size,2])
    # loss_coef=tf.placeholder(tf.float32,shape=[batch_no_validation,1,1,1])

    # img_row1 = tf.placeholder(tf.float32, shape=[batch_no,patch_window,patch_window,patch_window, 1])
    # img_row2 = tf.placeholder(tf.float32, shape=[batch_no,patch_window,patch_window,patch_window, 1])
    # img_row3 = tf.placeholder(tf.float32, shape=[batch_no,patch_window,patch_window,patch_window, 1])
    # img_row4 = tf.placeholder(tf.float32, shape=[batch_no,patch_window,patch_window,patch_window, 1])
    # img_row5 = tf.placeholder(tf.float32, shape=[batch_no,patch_window,patch_window,patch_window, 1])
    # img_row6 = tf.placeholder(tf.float32, shape=[batch_no,patch_window,patch_window,patch_window, 1])
    # img_row7 = tf.placeholder(tf.float32, shape=[batch_no,patch_window,patch_window,patch_window, 1])
    # img_row8 = tf.placeholder(tf.float32, shape=[batch_no,patch_window,patch_window,patch_window, 1])
    #
    # label1 = tf.placeholder(tf.float32, shape=[batch_no,label_patchs_size,label_patchs_size,label_patchs_size, 1])
    # label2 = tf.placeholder(tf.float32, shape=[batch_no,label_patchs_size,label_patchs_size,label_patchs_size, 1])
    # label3 = tf.placeholder(tf.float32, shape=[batch_no,label_patchs_size,label_patchs_size,label_patchs_size, 1])
    # label4 = tf.placeholder(tf.float32, shape=[batch_no,label_patchs_size,label_patchs_size,label_patchs_size, 1])
    # label5 = tf.placeholder(tf.float32, shape=[batch_no,label_patchs_size,label_patchs_size,label_patchs_size, 1])
    # label6 = tf.placeholder(tf.float32, shape=[batch_no,label_patchs_size,label_patchs_size,label_patchs_size, 1])
    # label7 = tf.placeholder(tf.float32, shape=[batch_no,label_patchs_size,label_patchs_size,label_patchs_size, 1])
    # label8 = tf.placeholder(tf.float32, shape=[batch_no,label_patchs_size,label_patchs_size,label_patchs_size, 1])
    # label9 = tf.placeholder(tf.float32, shape=[batch_no,label_patchs_size,label_patchs_size,label_patchs_size, 1])
    # label10 = tf.placeholder(tf.float32, shape=[batch_no,label_patchs_size,label_patchs_size,label_patchs_size, 1])
    # label11 = tf.placeholder(tf.float32, shape=[batch_no,label_patchs_size,label_patchs_size,label_patchs_size, 1])
    # label12 = tf.placeholder(tf.float32, shape=[batch_no,label_patchs_size,label_patchs_size,label_patchs_size, 1])
    # label13 = tf.placeholder(tf.float32, shape=[batch_no,label_patchs_size,label_patchs_size,label_patchs_size, 1])
    # label14 = tf.placeholder(tf.float32, shape=[batch_no,label_patchs_size,label_patchs_size,label_patchs_size, 1])
    img_row1 = tf.placeholder(tf.float32, shape=[None, None, None, None, 1])
    img_row2 = tf.placeholder(tf.float32, shape=[None, None, None, None, 1])
    img_row3 = tf.placeholder(tf.float32, shape=[None, None, None, None, 1])
    img_row4 = tf.placeholder(tf.float32, shape=[None, None, None, None, 1])
    img_row5 = tf.placeholder(tf.float32, shape=[None, None, None, None, 1])
    img_row6 = tf.placeholder(tf.float32, shape=[None, None, None, None, 1])
    img_row7 = tf.placeholder(tf.float32, shape=[None, None, None, None, 1])
    img_row8 = tf.placeholder(tf.float32, shape=[None, None, None, None, 1])

    label1 = tf.placeholder(tf.float32, shape=[None, None, None, None, 1])
    label2 = tf.placeholder(tf.float32, shape=[None, None, None, None, 1])
    label3 = tf.placeholder(tf.float32, shape=[None, None, None, None, 1])
    label4 = tf.placeholder(tf.float32, shape=[None, None, None, None, 1])
    label5 = tf.placeholder(tf.float32, shape=[None, None, None, None, 1])
    label6 = tf.placeholder(tf.float32, shape=[None, None, None, None, 1])
    label7 = tf.placeholder(tf.float32, shape=[None, None, None, None, 1])
    label8 = tf.placeholder(tf.float32, shape=[None, None, None, None, 1])
    label9 = tf.placeholder(tf.float32, shape=[None, None, None, None, 1])
    label10 = tf.placeholder(tf.float32, shape=[None, None, None, None, 1])
    label11 = tf.placeholder(tf.float32, shape=[None, None, None, None, 1])
    label12 = tf.placeholder(tf.float32, shape=[None, None, None, None, 1])
    label13 = tf.placeholder(tf.float32, shape=[None, None, None, None, 1])
    label14 = tf.placeholder(tf.float32, shape=[None, None, None, None, 1])
    is_training = tf.placeholder(tf.bool, name='is_training')
    input_dim = tf.placeholder(tf.int32, name='input_dim')
    # ave_huber = tf.placeholder(tf.float32, name='huber')

    forked_densenet = _forked_densenet()

    y, img_row1, img_row2, img_row3, img_row4, \
    img_row5, img_row6, img_row7, img_row8 = \
        forked_densenet.densenet(img_row1=img_row1, img_row2=img_row2, img_row3=img_row3, img_row4=img_row4,
                                 img_row5=img_row5,
                                 img_row6=img_row6, img_row7=img_row7, img_row8=img_row8, input_dim=input_dim,
                                 is_training=is_training)

    loss_instance = _loss_func()
    with tf.name_scope('averaged_mean_squared_error'):  #
        [averaged_huber, perf_loss, angio_loss
         ] = loss_instance.averaged_huber(label1=label1,
                                          label2=label2,
                                          label3=label3,
                                          label4=label4,
                                          label5=label5,
                                          label6=label6,
                                          label7=label7,
                                          label8=label8,
                                          label9=label9,
                                          label10=label10,
                                          label11=label11,
                                          label12=label12,
                                          label13=label13,
                                          label14=label14,
                                          logit1=y[:, :, :, :, 0, np.newaxis],
                                          logit2=y[:, :, :, :, 1, np.newaxis],
                                          logit3=y[:, :, :, :, 2, np.newaxis],
                                          logit4=y[:, :, :, :, 3, np.newaxis],
                                          logit5=y[:, :, :, :, 4, np.newaxis],
                                          logit6=y[:, :, :, :, 5, np.newaxis],
                                          logit7=y[:, :, :, :, 6, np.newaxis],
                                          logit8=y[:, :, :, :, 7, np.newaxis],
                                          logit9=y[:, :, :, :, 8, np.newaxis],
                                          logit10=y[:, :, :, :, 9, np.newaxis],
                                          logit11=y[:, :, :, :, 10,
                                                    np.newaxis],
                                          logit12=y[:, :, :, :, 11,
                                                    np.newaxis],
                                          logit13=y[:, :, :, :, 12,
                                                    np.newaxis],
                                          logit14=y[:, :, :, :, 13,
                                                    np.newaxis])
        cost = tf.reduce_mean(averaged_huber, name="cost")

    # ========================================================================
    ave_loss = tf.placeholder(tf.float32, name='loss')
    ave_loss_perf = tf.placeholder(tf.float32, name='loss_perf')
    ave_loss_angio = tf.placeholder(tf.float32, name='loss_angio')

    average_gradient_perf = tf.placeholder(tf.float32, name='grad_ave_perf')
    average_gradient_angio = tf.placeholder(tf.float32, name='grad_ave_angio')

    # restore the model
    sess = tf.Session()
    saver = tf.train.Saver()

    ckpt = tf.train.get_checkpoint_state(chckpnt_dir)
    saver.restore(sess, ckpt.model_checkpoint_path)

    copyfile('./test_synthesizing_net.py',
             result_path + 'test_synthesizing_net.py')

    _image_class = image_class(train_data,
                               bunch_of_images_no=1,
                               is_training=1,
                               patch_window=patch_window,
                               sample_no_per_bunch=1,
                               label_patch_size=label_patchs_size,
                               validation_total_sample=0)
    learning_rate = 1E-5
    extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(extra_update_ops):
        optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
    loss = 0
    mse_angio = []
    mse_perf = []
    test_list = []
    time_list = []
    for img_indx in range(len(test_set)):
        crush, noncrush, perf, angio, spacing, direction, origin = _image_class.read_image_for_test(
            test_set=test_set,
            img_indx=img_indx,
            input_size=patch_window,
            final_layer=final_layer)
        test_set[img_indx][0][0].split('/')

        start = time.time()

        [loss_train1, out] =\
        sess.run([cost, y], feed_dict={
                                          img_row1: np.expand_dims(np.expand_dims(crush[0][:, :, :], axis=0), axis=-1),
                                          img_row2: np.expand_dims(np.expand_dims(noncrush[1][:, :, :], axis=0),
                                                                   axis=-1),
                                          img_row3: np.expand_dims(np.expand_dims(crush[2][:, :, :], axis=0), axis=-1),
                                          img_row4: np.expand_dims(np.expand_dims(noncrush[3][:, :, :], axis=0),
                                                                   axis=-1),
                                          img_row5: np.expand_dims(np.expand_dims(crush[4][:, :, :], axis=0), axis=-1),
                                          img_row6: np.expand_dims(np.expand_dims(noncrush[5][:, :, :], axis=0),
                                                                   axis=-1),
                                          img_row7: np.expand_dims(np.expand_dims(crush[6][:, :, :], axis=0), axis=-1),
                                          img_row8: np.expand_dims(np.expand_dims(noncrush[7][:, :, :], axis=0),
                                                                   axis=-1),
                                          label1: np.expand_dims(np.expand_dims(perf[0], axis=0), axis=-1),
                                          label2: np.expand_dims(np.expand_dims(perf[1], axis=0), axis=-1),
                                          label3: np.expand_dims(np.expand_dims(perf[2], axis=0), axis=-1),
                                          label4: np.expand_dims(np.expand_dims(perf[3], axis=0), axis=-1),
                                          label5: np.expand_dims(np.expand_dims(perf[4], axis=0), axis=-1),
                                          label6: np.expand_dims(np.expand_dims(perf[5], axis=0), axis=-1),
                                          label7: np.expand_dims(np.expand_dims(perf[6], axis=0), axis=-1),
                                          label8: np.expand_dims(np.expand_dims(angio[0], axis=0), axis=-1),
                                          label9: np.expand_dims(np.expand_dims(angio[1], axis=0), axis=-1),
                                          label10: np.expand_dims(np.expand_dims(angio[2], axis=0), axis=-1),
                                          label11: np.expand_dims(np.expand_dims(angio[3], axis=0), axis=-1),
                                          label12: np.expand_dims(np.expand_dims(angio[4], axis=0), axis=-1),
                                          label13: np.expand_dims(np.expand_dims(angio[5], axis=0), axis=-1),
                                          label14: np.expand_dims(np.expand_dims(angio[6], axis=0), axis=-1),
                                          is_training: False,
                                          input_dim: patch_window,
                                          ave_loss: -1,
                                          ave_loss_perf: -1,
                                          ave_loss_angio: -1,
                                          average_gradient_perf: -1,
                                          average_gradient_angio: -1
                                          })
        end = time.time()
        elapsed_time = end - start
        for i in range(np.shape(out)[-1]):
            image = out[0, :, :, :, i]
            sitk_image = sitk.GetImageFromArray(image)
            res_dir = test_set[img_indx][0][0].split('/')[-2]
            if i == 0:
                os.mkdir(parent_path + 'results/' + res_dir)
            if i < 7:
                nm = 'perf'
            else:
                nm = 'angi'
            sitk_image.SetDirection(direction=direction)
            sitk_image.SetOrigin(origin=origin)
            sitk_image.SetSpacing(spacing=spacing)
            sitk.WriteImage(
                sitk_image, parent_path + 'results/' + res_dir + '/' + nm +
                '_' + str(i % 7) + '.mha')
            print(parent_path + 'results/' + res_dir + '/' + nm + '_' +
                  str(i % 7) + '.mha done!')
        mse_p = 0
        mse_a = 0
        for i in range(7):
            if i == 0:
                os.mkdir(parent_path + 'results/' + res_dir + '/GT/')
            sitk_angio = sitk.GetImageFromArray(angio[i])
            sitk_angio.SetDirection(direction=direction)
            sitk_angio.SetOrigin(origin=origin)
            sitk_angio.SetSpacing(spacing=spacing)
            sitk.WriteImage(
                sitk_angio, parent_path + 'results/' + res_dir + '/GT/angio_' +
                str(i) + '.mha')

            mse_p = np.mean(np.power(out[0, :, :, :, i + 7] - perf[i], 2))
            mse_a = np.mean(np.power(out[0, :, :, :, i] - angio[i], 2))

            sitk_perf = sitk.GetImageFromArray(perf[i])
            sitk_perf.SetDirection(direction=direction)
            sitk_perf.SetOrigin(origin=origin)
            sitk_perf.SetSpacing(spacing=spacing)
            sitk.WriteImage(
                sitk_perf, parent_path + 'results/' + res_dir + '/GT/perf_' +
                str(i) + '.mha')

        # plt.imshow(out[0, int(gt_cube_size / 2), :, :, 0])
        # plt.figure()
        loss += loss_train1
        test_list.append(res_dir)
        mse_angio.append(mse_a)
        mse_perf.append(mse_p)
        time_list.append(elapsed_time)
        print('Loss_train: ', loss_train1)
        df = pd.DataFrame({
            'test_list': test_list,
            'mse_angio': mse_angio,
            'mse_perf': mse_perf,
            'elapsed_time': time_list
        })
        writer = ExcelWriter(parent_path + 'results/output.xlsx')
        df.to_excel(writer, 'Sheet1', index=False)
        writer.save()
    print('Total loss: ', loss / len(test_set))
コード例 #32
0
    def export_csv(self, request):
        elearning_session = ELearningSession.objects.all()

        field_names = [
            'id', 'quiz', 'session', 'category', 'sub_category', 'figure',
            'content', 'explanation', 'correct', 'answer1', 'answer2',
            'answer3'
        ]

        id_list = []
        elearning_name_list = []
        session_list = []
        category = []
        sub_category = []
        figure_list = []
        content = []
        explanation_list = []
        correct = []
        answer1 = []
        answer2 = []
        answer3 = []
        count = 1
        for q in elearning_session:

            try:
                for slide in q.slides.all().values():
                    # if q.elearning in elearning_name_list:
                    # 	session_list.append(q.number)
                    # else:
                    # 	count = 1
                    session_list.append(q.number)
                    id_list.append(count)
                    elearning_name_list.append(q.elearning)
                    category.append("n")
                    sub_category.append("n")
                    figure_list.append(slide['image'])
                    content.append("n")
                    explanation_list.append("n")
                    correct.append("n")
                    answer1.append("n")
                    answer2.append("n")
                    answer3.append("n")

                for question in q.questions.all().values():
                    id_list.append(count)
                    # if q.elearning in elearning_name_list:
                    # 	session_list.append(count)
                    # else:
                    # 	count = 1
                    session_list.append(q.number)
                    elearning_name_list.append(q.elearning)
                    category.append(question['category'])
                    sub_category.append(question['sub_category'])
                    figure_list.append("n")
                    content.append(question['text'])
                    explanation_list.append(question['explanation'])
                    correct_answer = Answer.objects.filter(
                        question=question['id'],
                        correct=True).values_list('text')
                    other_answers = Answer.objects.filter(
                        question=question['id'],
                        correct=False).values_list('text')
                    correct.append(correct_answer[0][0])
                    try:
                        answer1.append(other_answers[0][0])
                    except:
                        answer1.append("")
                    try:
                        answer2.append(other_answers[1][0])
                    except:
                        answer2.append("")
                    try:
                        answer3.append(other_answers[2][0])
                    except:
                        answer3.append("")
            except:
                continue
            else:
                count += 1

        data = {
            'id': id_list,
            'quiz': elearning_name_list,
            'session': session_list,
            'category': category,
            'sub_category': sub_category,
            'figure': figure_list,
            'content': content,
            'explanation': explanation_list,
            'correct': correct,
            'answer1': answer1,
            'answer2': answer2,
            'answer3': answer3
        }
        df = pandas.DataFrame(data, columns=field_names)

        df = df.dropna()

        writer = ExcelWriter('Elearning-db.xlsx')
        df.to_excel(writer, 'Elearning', index=False)
        writer.save()

        path = "Elearning-db.xlsx"

        if os.path.exists(path):
            with open(path, "rb") as excel:
                data = excel.read()

            response = HttpResponse(data,
                                    content_type='application/vnd.ms-excel')
            response[
                'Content-Disposition'] = 'attachment; filename="db_elearning.xlsx"'
        return response
コード例 #33
0
ファイル: crawler.py プロジェクト: miloshIra/Collatz
def write_to_excel():
    writer = ExcelWriter('Books.xlsx')
    df.to_excel(writer, 'Denes')
    writer.save()
コード例 #34
0
ファイル: general.py プロジェクト: nikosavola/scikit-rf
def networkset_2_spreadsheet(ntwkset: 'NetworkSet',
                             file_name: str = None,
                             file_type: str = 'excel',
                             *args,
                             **kwargs):
    '''
    Write a NetworkSet object to a spreadsheet, for your boss

    Write  the s-parameters  of a each network in the networkset to a
    spreadsheet. If the `excel` file_type is used, then each network,
    is written to its own sheet, with the sheetname taken from the
    network `name` attribute.
    This functions makes use of the pandas module, which in turn makes
    use of the xlrd module. These are imported during this function

    Notes
    ------
    The frequency unit used in the spreadsheet is take from
    `ntwk.frequency.unit`

    Parameters
    -----------
    ntwkset :  :class:`~skrf.networkSet.NetworkSet` object
        the network to write
    file_name : str, None
        the file_name to write. if None,  ntwk.name is used.
    file_type : ['csv','excel','html']
        the type of file to write. See pandas.DataFrame.to_??? functions.
    form : 'db','ma','ri'
        format to write data,
        * db = db, deg
        * ma = mag, deg
        * ri = real, imag
    \*args, \*\*kwargs :
        passed to pandas.DataFrame.to_??? functions.


    See Also
    ---------
    networkset_2_spreadsheet : writes a spreadsheet for many networks
    '''
    from pandas import DataFrame, Series, ExcelWriter  # delayed because its not a requirement
    if ntwkset.name is None and file_name is None:
        raise (ValueError('Either ntwkset must have name or give a file_name'))
    if file_name is None:
        file_name = ntwkset.name

    if file_type == 'excel':
        # add file extension if missing
        if not file_name.endswith('.xlsx'):
            file_name += '.xlsx'
        writer = ExcelWriter(file_name)
        [
            network_2_spreadsheet(k,
                                  writer,
                                  sheet_name=k.name,
                                  *args,
                                  **kwargs) for k in ntwkset
        ]
        writer.save()
    else:
        [network_2_spreadsheet(k, *args, **kwargs) for k in ntwkset]
コード例 #35
0
def main_f():
    xl1 = pd.ExcelFile("rockpaperseaser_new1.xlsx")
    df = xl1.parse("Sheet1")

    df = df.replace('r', 1)
    df = df.replace('p', 2)
    df = df.replace('s', 3)

    df[[]]

    out_priority1 = ''
    if (df['Result'][len(df) - 1] == 'loss'
            and df['Result'][len(df) - 2] == 'loss'):
        i = random.randint(1, 3)
        if (i == 1):
            out_priority1 = 'r'
        elif (i == 2):
            out_priority1 = 'p'
        else:
            out_priority1 = 's'

    out_priority2 = ''
    if (df['my pick'][len(df) - 1] == df['my pick'][len(df) - 2]):
        if (df['my pick'][len(df) - 1] == 1):
            out_priority2 = 's'
        elif (df['my pick'][len(df) - 1] == 2):
            out_priority2 = 'r'
        else:
            out_priority2 = 'p'

    #df = pd.read_xlsx(file_name)
    l_d = []
    for i in range(4, len(df)):
        l_d.append([
            df['my pick'][i], df['my pick'][i - 1], df['my pick'][i - 2],
            df['my pick'][i - 3], df['my pick'][i - 4], df['computer pick'][i],
            df['computer pick'][i - 1], df['computer pick'][i - 2],
            df['computer pick'][i - 3], df['computer pick'][i - 4]
        ])
    df1 = pd.DataFrame(l_d,
                       columns=[
                           'my_pick', 'my_p_1', 'my_p_2', 'my_p_3', 'my_p_4',
                           'computer_pick', 'com_p_1', 'com_p_2', 'com_p_3',
                           'com_p_4'
                       ])

    df1['p_pred'] = (df1['computer_pick'] == 2) * 1

    df1['r_pred'] = (df1['computer_pick'] == 1) * 1

    df1['s_pred'] = (df1['computer_pick'] == 3) * 1

    df_X = df1.drop(
        {
            'p_pred', 'r_pred', 's_pred', 'my_pick', 'computer_pick', 'my_p_4',
            'com_p_4', 'my_p_3', 'com_p_3'
        },
        axis=1)

    # 2. FIT

    # 3. Transform
    onehotlabels = enc.transform(df_X).toarray()
    new_data = onehotlabels[onehotlabels.shape[0] - 1]

    p_pred = rf_p.predict(new_data.reshape(1, -1))
    r_pred = rf_r.predict(new_data.reshape(1, -1))
    s_pred = rf_s.predict(new_data.reshape(1, -1))

    #adding randomness in draw
    cond_prob = p_pred + r_pred + s_pred

    x = uniform(low=0, high=1, size=1)

    sel = [
        p_pred / cond_prob, (p_pred + r_pred) / cond_prob,
        (p_pred + r_pred + s_pred) / cond_prob
    ]
    out_priority3 = ''
    if (x < sel[0]):
        out_priority3 = 's'
    elif (x < sel[1]):
        out_priority3 = 'p'
    else:
        out_priority3 = 'r'
    my = ''
    if (out_priority1 != ''):
        print('play with ' + out_priority1)
        my = out_priority1
    elif (out_priority2 != ''):
        print('play with ' + out_priority2)
        my = out_priority2
    else:
        print('play with ' + out_priority3)
        my = out_priority3
    #my = input('What did you play?(r,p,s) ')
    computer = input('What did the AI play?(r,p,s) ')
    res = input('Did you win(y/n/t) ')
    result = ''

    if (res == 'y'):
        result = 'win'
    elif (res == 'n'):
        result = 'loss'
    else:
        result = 'tie'

    usr = [[my, computer, result]]
    usr_input = pd.DataFrame(usr,
                             columns=['my pick', 'computer pick', 'Result'])
    final = xl1.parse("Sheet1")
    final = final.append(usr_input)
    #final.reset_index(drop=True)

    writer = ExcelWriter('rockpaperseaser_new1.xlsx')
    final.to_excel(writer, 'Sheet1', index=False)
    writer.save()
コード例 #36
0
ファイル: excel.py プロジェクト: tugay-ai/pandas
 def time_write_excel(self, engine):
     bio_write = BytesIO()
     bio_write.seek(0)
     writer_write = ExcelWriter(bio_write, engine=engine)
     self.df.to_excel(writer_write, sheet_name="Sheet1")
     writer_write.save()
コード例 #37
0
ファイル: DAG file.py プロジェクト: dashashu/airflowDAG
def ModifyInputfiles():
    def get_excel_files(path):
        return [f for f in listdir(path) if isfile(join(path, f))]

    dirpath = '/home/airflow/gcs/data/'
    print("Dirpath:" + dirpath)
    if not os.path.exists(os.path.join(dirpath, 'in-data')):
        os.mkdir(os.path.join(dirpath, 'in-data'))
    OutputFolder = os.path.join(dirpath, 'in-data')

    inputfilepath = '/home/airflow/gcs/data/input'
    mappingFilePath = '/home/airflow/gcs/data/map'

    mapping = get_excel_files(mappingFilePath)
    inputfilelist = get_excel_files(inputfilepath)

    for map_file in mapping:  #if in future we need to take multiple files
        print("running file:" + map_file)
        mapping_exten = map_file.split('.')[1]
        if mapping_exten == 'csv':
            mapping_df = pd.read_csv(os.path.join(mappingFilePath, map_file),
                                     sep=';')
        elif mapping_exten == 'xlsx':
            mapping_df = pd.read_excel(os.path.join(mappingFilePath, map_file))
    mapping_df = mapping_df.loc[:,
                                mapping_df.columns.intersection([
                                    'VM', 'VNF Name', 'VNF Program name',
                                    'VNF vendor name', 'Site Name'
                                ])]
    mapping_df = mapping_df.rename(columns={
        'VNF Program name': 'Program Name',
        'VNF vendor name': 'Vendor'
    })
    writer = ExcelWriter(
        os.path.join(OutputFolder,
                     'FS_Input_' + time.strftime("%Y_%m_%d") + '.xlsx'))
    final_input_df = pd.DataFrame()
    for input in inputfilelist:  #if in future we need to take multiple files
        print("Running file: " + input)
        input_exten = input.split('.')[1]
        file_name = input.split('.')[0]
        site_name = file_name.split('_')[0]
        if input_exten == 'csv':
            input_df = pd.read_csv(os.path.join(inputfilepath, input), sep=';')
        elif input_exten == 'xlsx':
            input_df = pd.read_excel(os.path.join(inputfilepath, input))
        input_df = input_df.rename(columns=lambda x: x.strip())

        input_df['Disk Average Read Bytes Per Second (kB/s)'] = pd.to_numeric(
            input_df['Disk Average Read Bytes Per Second (kB/s)'].astype(
                str).replace('.', ''),
            errors='coerce')
        input_df['Disk Average Read Bytes Per Second (kB/s)'] = pd.to_numeric(
            input_df['Disk Average Read Bytes Per Second (kB/s)'],
            downcast="float")
        input_df['Disk Average Write Bytes Per Second (kB/s)'] = pd.to_numeric(
            input_df['Disk Average Write Bytes Per Second (kB/s)'].astype(
                str).replace('.', ''),
            errors='coerce')
        input_df['Disk Average Write Bytes Per Second (kB/s)'] = pd.to_numeric(
            input_df['Disk Average Write Bytes Per Second (kB/s)'],
            downcast="float")
        #input_df['Site Name'] = site_name

        input_df = input_df.rename(columns=lambda x: x.lower())
        final_input_df = final_input_df.append(input_df)

    final_input_df = final_input_df.rename(
        columns={
            'site name':
            'SiteName',
            'start time':
            'timestamp',
            'vm':
            'VM',
            'cpu usage %':
            'CPU usage (%)',
            'memory usage %':
            'Memory Usage (%)',
            'disk usage %':
            'Disk Usage (%)',
            'disk average read bytes per second (kb/s)':
            'Disk Average Read Bytes Per Second (kB/s)',
            'disk average write bytes per second (kb/s)':
            'Disk Average Write Bytes Per Second (kB/s)'
        })
    final_input_df = pd.merge(final_input_df, mapping_df, on='VM', how='left')

    final_input_df['Disk Usage (%)'] = final_input_df[
        'Disk Usage (%)'].convert_objects(convert_numeric=True)
    final_input_df['Memory Usage (%)'] = final_input_df[
        'Memory Usage (%)'].convert_objects(convert_numeric=True)
    final_input_df['CPU usage (%)'] = final_input_df[
        'CPU usage (%)'].convert_objects(convert_numeric=True)

    final_input_df['Disk Usage (%)'] = np.round(
        final_input_df['Disk Usage (%)'].astype(float), 3)
    final_input_df['Memory Usage (%)'] = np.round(
        final_input_df['Memory Usage (%)'].astype(float), 3)
    final_input_df['CPU usage (%)'] = np.round(
        final_input_df['CPU usage (%)'].astype(float), 3)
    final_input_df.to_excel(writer, index=False)
    writer.save()
    writer.close()
    print('end of process')
コード例 #38
0
def write_xlsx(df, name_file):
    writer = ExcelWriter(f'{name_file}.xlsx')
    df.to_excel(writer, 'Sheet1')
    writer.save()
    return 'ФАЙЛ СОХРАНЕН'
コード例 #39
0
ファイル: GeneExpression.py プロジェクト: marcomass/genereg
def extract_expression(tumor, platform, gencode_version):

	"""
	The EXTRACT_EXPRESSION operation extracts expression values from TCGA for all the genes of interest and their candidate regulatory genes. Intermediate results files are exported locally during the execution of the function, while the final dataframes are returned as Pandas dataframes and exported locally in the Excel files 'Gene Expression - InterestGenes.xlsx' and 'Gene Expression - RegulatoryGenes.xlsx'.

	:param tumor: full name of the tumor of interest, encoded as a string (e.g. 'Ovarian Serous Cystadenocarcinoma', 'Breast Invasive Carcinoma', ...)
	:param platform: number identifying the sequencing platform (either 27 for the 27k probes sequencing platform or 450 for the 450k probes sequencing platform)
	:param gencode_version: number representing the GENCODE genomic annotations to use (currently, for assembly GRCh38, versions 22, 24 and 27 can be used)
	:return: two Pandas dataframes

	Example::
	
		import genereg as gr
		expr_interest_df, expr_regul_df = gr.GeneExpression.extract_expression(tumor='Ovarian Serous Cystadenocarcinoma', platform=27, gencode_version=22)
	"""

	# Check input parameters
	tcga_tumors = ["Acute Myeloid Leukemia","Adrenocortical Carcinoma","Bladder Urothelial Carcinoma","Brain Lower Grade Glioma" ,"Breast Invasive Carcinoma","Cervical Squamous Cell Carcinoma and Endocervical Adenocarcinoma","Cholangiocarcinoma","Colon Adenocarcinoma","Esophageal Carcinoma","Glioblastoma Multiforme","Head and Neck Squamous Cell Carcinoma","Kidney Chromophobe","Kidney Renal Clear Cell Carcinoma","Kidney Renal Papillary Cell Carcinoma","Liver Hepatocellular Carcinoma","Lung Adenocarcinoma","Lung Squamous Cell Carcinoma","Lymphoid Neoplasm Diffuse Large B-cell Lymphoma","Mesothelioma","Ovarian Serous Cystadenocarcinoma","Pancreatic Adenocarcinoma","Pheochromocytoma and Paraganglioma","Prostate Adenocarcinoma","Rectum Adenocarcinoma","Sarcoma","Skin Cutaneous Melanoma","Stomach Adenocarcinoma","Testicular Germ Cell Tumors","Thymoma","Thyroid Carcinoma","Uterine Carcinosarcoma","Uterine Corpus Endometrial Carcinoma","Uveal Melanoma"]
	if tumor not in tcga_tumors:
		raise ValueError('PATHOLOGY NOT SUPPORTED! You can analyze one of these 33 types of TCGA tumors: '+(', '.join(tcga_tumors)))
	
	if platform not in [27, 450]:
		raise ValueError('PLATFORM NOT RECOGNIZED! Sequencing platforms available: 27 and 450')
	
	if gencode_version not in [22, 24, 27]:
		raise ValueError('GRCh38 GENCODE versions available are 22, 24 and 27')
	
	
	# Load the list of genes of interest
	EntrezConversion_df = pd.read_excel('./Genes_of_Interest.xlsx',sheetname='Sheet1',header=0,converters={'GENE_SYMBOL':str,'ENTREZ_GENE_ID':str,'GENE_SET':str})
	
	# Create a list containing the Gene Symbols of the genes of interest
	genesSYM_of_interest = []
	for i, r in EntrezConversion_df.iterrows():
		sym = r['GENE_SYMBOL']
		if sym not in genesSYM_of_interest:
			genesSYM_of_interest.append(sym)

	# Import the dictionary of genes of interest with their candidate regulatory genes
	dict_RegulGenes = pickle.load(open('./2_Regulatory_Genes/dict_RegulGenes.p', 'rb'))

	# Import the gene-TFs mapping dataframe 
	Mapping_df = pd.read_excel('./0_Genes_Mapping/Genes Mapping.xlsx',sheetname='Sheet1',header=0,converters={'ENTREZ_GENE_ID':str,'HGNC_ID':str})

	# Create a list containing the Gene Symbols of the regulatory genes of genes of interest
	regulatory_genesSYM = []
	for key, value in dict_RegulGenes.items():
		for gene in value:  
			if gene not in regulatory_genesSYM:
				regulatory_genesSYM.append(gene)

	# Extract the list of distinct Gene Symbols mapped in the mapping table
	mapped_gene_SYMs = []
	for index, row in Mapping_df.iterrows():
		sym = row['GENE_SYMBOL']
		if sym not in mapped_gene_SYMs:
			mapped_gene_SYMs.append(sym)


	# Execute the query for the extraction of gene expression values on the remote server, using the PyGMQL Python library
	gl.set_remote_address('http://gmql.eu/gmql-rest/')
	gl.login()
	gl.set_mode('remote')

	# Load the TCGA datasets to be used in the query
	methylation_dataset = gl.load_from_remote(remote_name='GRCh38_TCGA_methylation', owner='public')  
	expression_dataset = gl.load_from_remote(remote_name='GRCh38_TCGA_gene_expression', owner='public') 

	# Identify the sequencing platform to be used
	if platform == 27:
		seq_platform = 'Illumina Human Methylation 27'
	elif platform == 450:
	    seq_platform = 'Illumina Human Methylation 450'
	
	# Extract all the samples for the current tumor and platform
	all_methyl = methylation_dataset.meta_select((methylation_dataset['manually_curated__cases__disease_type'] == tumor) & (methylation_dataset['manually_curated__platform'] == seq_platform) & ((methylation_dataset['biospecimen__bio__sample_type'] == 'Primary Tumor') | (methylation_dataset['biospecimen__bio__sample_type'] == 'Recurrent Tumor')) & (methylation_dataset['clinical__shared__history_of_neoadjuvant_treatment'] == 'No'))
	all_expr = expression_dataset.meta_select((expression_dataset['manually_curated__cases__disease_type'] == tumor) & ((expression_dataset['biospecimen__bio__sample_type'] == 'Primary Tumor') | (expression_dataset['biospecimen__bio__sample_type'] == 'Recurrent Tumor')) & (expression_dataset['clinical__shared__history_of_neoadjuvant_treatment'] == 'No'))

	# Gene Expression:
	expr_0 = all_expr.reg_project(field_list=['ensembl_gene_id','entrez_gene_id','gene_symbol','fpkm'])
	expr = expr_0.meta_select(semiJoinDataset=all_methyl, semiJoinMeta=['biospecimen__bio__bcr_sample_barcode'])

	# Materialize the results into a GDataframe
	expr_Gdf = expr.materialize('./(MaterializeResults)')


	# The result dataset is loaded as a GDataframe, an object containing two pandas dataframes, one for the region data and one for the metadata.
	# Get the two pandas dataframes:
	expr_df_regs = expr_Gdf.regs
	expr_df_meta = expr_Gdf.meta
	n_regs = len(expr_df_regs)
	n_samples = len(expr_df_meta)

	# Rename 'chr', 'start', and 'stop' columns header
	expr_df_regs.rename(columns={'chr':'chrom','start':'left','stop':'right'}, inplace=True)
	# Change index into progressive integer numbers and store the name of the sample in another column
	expr_df_regs['sample_id'] = expr_df_regs.index
	expr_df_regs.index = range(n_regs)

	# Convert unknown values (NaN) to empty strings
	expr_df_regs = expr_df_regs.fillna('')

	# Convert all the metadata values into strings, since they're encode as lists in Python
	col_names = []
	for name, values in expr_df_meta.iteritems():
		col_names.append(name)
	for index, row in expr_df_meta.iterrows():
		for c in col_names:
			list_val = row[c] # it's encoded as a list
			str_val = ''.join(list_val)  # convert the value stored as a list in a string
			expr_df_meta.set_value(index,c,str_val)

		
	# Since we have to extract the expression values for each distinct sample barcode (aliquot), we create a list containing these distinct identifiers
	expr_sample_barcodes_all = []
	for index, row in expr_df_meta.iterrows():
		barcode = row['biospecimen__bio__bcr_sample_barcode']    
		if barcode not in expr_sample_barcodes_all: # get distinct values
			expr_sample_barcodes_all.append(barcode)
        
	# Check which are repeated aliquots, if present
	all_aliqouts = []
	for index, row in expr_df_meta.iterrows():
		barcode = row['biospecimen__bio__bcr_sample_barcode']  
		all_aliqouts.append(barcode)
	multiple_aliquots = [item for item, count in collections.Counter(all_aliqouts).items() if count > 1]

	samples_to_remove = []
	expr_sample_barcodes = []
	if len(multiple_aliquots) != 0:    
		# Among the repeated aliquots, keep only the most recent ones (of 2013)
		for index, row in expr_df_meta.iterrows():
			year = row['biospecimen__bio__year_of_shipment']
			barcode = row['biospecimen__bio__bcr_sample_barcode']  
			if (barcode in multiple_aliquots) and year == '2011':
				expr_df_meta.drop(index, inplace=True)
				samples_to_remove.append(index)

		# Import the list of aliquots in the methylation dataset 
		text_file = open(common_aliquots, 'r')
		aliquots = text_file.read().split('\n')
		aliquots.remove('')
		text_file.close()
			
		# Extract the new list of distinct TCGA Aliquots to extract
		for index, row in expr_df_meta.iterrows():
			barcode = row['biospecimen__bio__bcr_sample_barcode'] 
			if barcode in aliquots:
				if barcode not in expr_sample_barcodes:
					expr_sample_barcodes.append(barcode)        
			else:
				expr_df_meta.drop(index, inplace=True)
				samples_to_remove.append(index)
			
		# Remove regions that corresponded to eliminated repeated aliquots
		expr_df_regs = expr_df_regs.loc[~(expr_df_regs['sample_id'].isin(samples_to_remove))].copy()

	else:
		expr_sample_barcodes = expr_sample_barcodes_all		

		
	# Export the metadata dataframe setting the TCGA aliquots as indexes.
	Metadata_df = expr_df_meta.copy()
	Metadata_df['id_sample'] = Metadata_df.index
	Metadata_df.set_index('biospecimen__bio__bcr_sample_barcode', inplace=True)
	writer = ExcelWriter('./3_TCGA_Data/Gene_Expression/EXPR (Metadata).xlsx')
	Metadata_df.to_excel(writer,'Sheet1')
	writer.save()	


	# Extract from the expression dataset all the regions that belong to genes of interest
	expr_df_regs_interest = expr_df_regs.loc[expr_df_regs['gene_symbol'].isin(genesSYM_of_interest)].copy()
	# Extract from the expression dataset all the regions that belong to regulatory genes of genes of interest
	expr_df_regs_regulatory = expr_df_regs.loc[expr_df_regs['gene_symbol'].isin(regulatory_genesSYM)].copy()


	# Gene expression values for each gene of interest:

	# Create a dictionary for storing all the gene expression values for each gene of interest and for each aliquot TCGA
	from collections import defaultdict
	dict_expr_interest = defaultdict(dict)

	for key, value in dict_expr_interest.items():
		value = defaultdict(list)

	# The main dictionary has the Gene Symbols of the genes of interest as keys and each gene has another dictionary as value, which, in turn, has the different aliquots as keys and lists as values.
	# The idea is having a list, containing all the fpkm values, for each gene in each TCGA aliquot.

	# Set the Gene Symbol as keys of the main dictionary
	for name in genesSYM_of_interest:
		dict_expr_interest[name] = {}

	# Set the names of the samples barcodes as keys for each dictionary set as value of a specific key (genes)
	for sample in expr_sample_barcodes:
		for k, v in dict_expr_interest.items():
			v[sample] = []
			
	# Set the values by appending the expression values for each gene of interest: these expression values (fpkm) can be found in the 'expr_df_regs_interest' dataframe
	for index, row in expr_df_regs_interest.iterrows():   # iterating along the whole dataframe
		sym = row['gene_symbol']  # get the Gene Symbol of the gene
		fpkm = row['fpkm']  # get the gene expression value
		sample = row['sample_id']  # get the name of the sample
		# get the aliquot corresponding to current sample
		aliq = expr_df_meta.get_value(sample, 'biospecimen__bio__bcr_sample_barcode')  
		# add the value according to the correct gene ID and TCGA aliquot, rounding it to a float with maximum 6 decimal numbers,
		dict_expr_interest[sym][aliq].append(round(float(fpkm),6))
		

	# Convert the nested dictionary also into a dataframe

	# Create a dataframe whose row indexes are the different TCGA samples and the columns are the distinct genes of interest
	expr_interest_df1 = pd.DataFrame(index = expr_sample_barcodes, columns = [genesSYM_of_interest])

	# Add three additional columns for the name of the sample and the ID and barcode of the patient corresponding to each aliquot, in order to have them available if we will need it
	expr_interest_df2 = pd.DataFrame(index = expr_sample_barcodes, columns = ['Sample_ID','Tumor','Patient_ID'])

	# Create the final dataframe
	expr_interest_df = expr_interest_df1.join(expr_interest_df2)

	# Fill the previously created dataframe with the correct gene expression values, for each gene of interest and for each TCGA aliquot            
	for gene_sym, dict_value in dict_expr_interest.items():
		for tcga_aliq, exp_list in dict_value.items():
			if (len(exp_list) != 0):
				fpkm = exp_list[0]
				# add the expression value in the proper cell of the dataframe, rounding it to a float with maximum 6 decimal numbers
				expr_interest_df.set_value(tcga_aliq,gene_sym,round(fpkm,6))
				

	# Add to the dataframe the name of each sample, the tumor code and the patient's ID in correspondence of each TCGA aliquot
	for index, row in expr_df_meta.iterrows():
		aliquot = row['biospecimen__bio__bcr_sample_barcode']
		tumor_tag = row['clinical__admin__disease_code']
		patient_id = row['clinical__shared__patient_id']
		expr_interest_df.set_value(aliquot,'Sample_ID',index)
		expr_interest_df.set_value(aliquot,'Tumor',tumor_tag)
		expr_interest_df.set_value(aliquot,'Patient_ID',patient_id)
		
	# Add a row at the beginning of the dataframe to insert also the Entrez Gene ID of each gene of interest
	additional_index = ['ENTREZ_GENE_ID']
	expr_interest_df0_1 = pd.DataFrame(index = additional_index, columns = [genesSYM_of_interest])
	expr_interest_df0_2 = pd.DataFrame(index = additional_index, columns = ['Sample_ID','Tumor','Patient_ID'])
	expr_interest_df0 = expr_interest_df0_1.join(expr_interest_df0_2)

	frames = [expr_interest_df0, expr_interest_df]
	expr_interest_df = pd.concat(frames)

	# Add for each Gene Symbol of our genes of interest the corresponding Entrez Gene ID in the first row of the dataframe
	for i, r in EntrezConversion_df.iterrows():
		entrez_id = r['ENTREZ_GENE_ID']
		gene_name = r['GENE_SYMBOL']
		expr_interest_df.set_value('ENTREZ_GENE_ID',gene_name,entrez_id)

	# Set empty strings for NaN values in the 'GENE_SYMBOL' row
	expr_interest_df.set_value('ENTREZ_GENE_ID','Sample_ID',"")
	expr_interest_df.set_value('ENTREZ_GENE_ID','Tumor',"")
	expr_interest_df.set_value('ENTREZ_GENE_ID','Patient_ID',"")


	# Export the dataframe with the gene expression values for our genes of interest for each TCGA aliquot 
	writer = ExcelWriter('./3_TCGA_Data/Gene_Expression/Gene Expression - InterestGenes.xlsx')
	expr_interest_df.to_excel(writer,'Sheet1')
	writer.save()


	# Gene expression values for each candidate regulatory gene of the genes of interest:

	# Create a dictionary for storing all the gene expression values for each gene of interest and for each aliquot TCGA
	from collections import defaultdict
	dict_expr_regulatory = defaultdict(dict)

	for key, value in dict_expr_regulatory.items():
		value = defaultdict(list)

	# The main dictionary has the Gene Symbols of the candidate regulatory genes as keys and each gene has another dictionary as value, which, in turn, has the different aliquots as keys and lists as values.
	# The idea is having a list, containing all the fpkm values, for each gene in each TCGA aliquot.

	# Set the Gene Symbols as keys of the main dictionary
	for name in regulatory_genesSYM:
		dict_expr_regulatory[name] = {}

	# Set the names of the samples barcodes as keys for each dictionary set as value of a specific key (genes)
	for sample in expr_sample_barcodes:
		for k, v in dict_expr_regulatory.items():
			v[sample] = []
        
	# Set the values by appending the expression values for each candidate regulatory gene: these expression values (fpkm) can be found in the "expr_df_regs_regulatory" dataframe
	for index, row in expr_df_regs_regulatory.iterrows():   # iterating along the whole dataframe
		sym = row['gene_symbol']  # get the Gene Symbol of the gene
		ens_id = row['ensembl_gene_id']  # get the Ensembl Gene ID
		fpkm = row['fpkm']  # get the gene expression value
		sample = row['sample_id']  # get the name of the sample
		# get the aliquot corresponding to current sample
		aliq = expr_df_meta.get_value(sample, 'biospecimen__bio__bcr_sample_barcode')
		# add the value according to the correct gene ID and TCGA aliquot, rounding it to a float with maximum 6 decimal numbers
		if (gencode_version == 22):
			if (ens_id not in ['ENSG00000277726.3','ENSG00000275895.3','ENSGR0000214717.8']):
				dict_expr_regulatory[sym][aliq].append(round(float(fpkm),6))
		else:
			dict_expr_regulatory[sym][aliq].append(round(float(fpkm),6))
	


	# Convert the nested dictionary also into a dataframe

	# Create a dataframe whose row indexes are the different TCGA samples and the columns are the distinct candidate regulatory genes
	expr_regulatory_df1 = pd.DataFrame(index = expr_sample_barcodes, columns = [regulatory_genesSYM])

	# Add three additional columns for the name of the sample and the ID and barcode of the patient corresponding to each aliquot, in order to have them available if we will need it
	expr_regulatory_df2 = pd.DataFrame(index = expr_sample_barcodes, columns = ['Sample_ID','Tumor','Patient_ID'])

	# Create the final dataframe
	expr_regulatory_df = expr_regulatory_df1.join(expr_regulatory_df2)

	# Fill the previously created dataframe with the correct gene expression values, for each candidate regulatory gene and for each TCGA aliquot            
	for gene_sym, dict_value in dict_expr_regulatory.items():
		for tcga_aliq, exp_list in dict_value.items():
			if (len(exp_list) != 0):
				fpkm = exp_list[0]
				# add the expression value in the proper cell of the dataframe, rounding it to a float with maximum 6 decimal numbers
				expr_regulatory_df.set_value(tcga_aliq,gene_sym,round(fpkm,6))
				

	# Add to the dataframe the name of each sample, the tumor code and the patient's ID in correspondence of each TCGA aliquot
	for index, row in expr_df_meta.iterrows():
		aliquot = row['biospecimen__bio__bcr_sample_barcode']
		tumor_tag = row['clinical__admin__disease_code']
		patient_id = row['clinical__shared__patient_id']
		expr_regulatory_df.set_value(aliquot,'Sample_ID',index)
		expr_regulatory_df.set_value(aliquot,'Tumor',tumor_tag)
		expr_regulatory_df.set_value(aliquot,'Patient_ID',patient_id)
		
	# Add a row at the beginning of the dataframe to insert also the Gene Symbols of each gene of interest
	additional_index = ['ENTREZ_GENE_ID']
	expr_regulatory_df0_1 = pd.DataFrame(index = additional_index, columns = [regulatory_genesSYM])
	expr_regulatory_df0_2 = pd.DataFrame(index = additional_index, columns = ['Sample_ID','Tumor','Patient_ID'])
	expr_regulatory_df0 = expr_regulatory_df0_1.join(expr_regulatory_df0_2)

	frames = [expr_regulatory_df0, expr_regulatory_df]
	expr_regulatory_df = pd.concat(frames)

	# Add for each Gene Symbol of the regulatory genes the corresponding Entrez Gene ID in the first row of the dataframe
	for i in regulatory_genesSYM:
		if i == 'PTRF':
			entrez_id = Mapping_df.loc[Mapping_df['GENE_SYMBOL'] == 'CAVIN1', 'ENTREZ_GENE_ID'].iloc[0]
		else:
			entrez_id = Mapping_df.loc[Mapping_df['GENE_SYMBOL'] == i, 'ENTREZ_GENE_ID'].iloc[0]
		expr_regulatory_df.set_value('ENTREZ_GENE_ID',i,entrez_id)

	# Set empty strings for NaN values in the 'GENE_SYMBOL' row
	expr_regulatory_df.set_value('ENTREZ_GENE_ID','Sample_ID',"")
	expr_regulatory_df.set_value('ENTREZ_GENE_ID','Tumor',"")
	expr_regulatory_df.set_value('ENTREZ_GENE_ID','Patient_ID',"")


	# Export the dataframe with the gene expression values for the regulatory genes of our genes of interest for each TCGA aliquot 
	writer = ExcelWriter('./3_TCGA_Data/Gene_Expression/Gene Expression - RegulatoryGenes.xlsx')
	expr_regulatory_df.to_excel(writer,'Sheet1')
	writer.save()
	
	return expr_interest_df, expr_regulatory_df
コード例 #40
0
def parse_func(path, filename):

    transactions = mt940.parse(path)

    # with open('combined.json', 'w') as json_file:
    #     json.dump(transactions, json_file)

    # with open('combined.json') as f:
    #     d = json.load(f)
    d = transactions
    df = json_normalize(transactions)
    df2 = (pd.concat(
        {i: json_normalize(x)
         for i, x in df.pop('transactions').items()},
        sort=False).reset_index(level=1, drop=True).join(
            df, lsuffix='_in_transactions',
            rsuffix='_if_opening_NA').reset_index(drop=True))
    try:
        df2 = df2[[
            'account_identification', 'date', 'amount.currency',
            'amount.amount', 'status', 'customer_reference',
            'transaction_reference', 'extra_details', 'transaction_details',
            'final_opening_balance.date', 'final_opening_balance.status',
            'final_opening_balance.amount.amount',
            'final_opening_balance.amount.currency', 'entry_date',
            'funds_code', 'guessed_entry_date', 'id', 'available_balance.date',
            'available_balance.status', 'available_balance.amount.amount',
            'available_balance.amount.currency', 'final_closing_balance.date',
            'final_closing_balance.status',
            'final_closing_balance.amount.amount',
            'final_closing_balance.amount.currency', 'sequence_number',
            'statement_number'
        ]]
        df2 = df2.rename(
            columns={
                'account_identification': 'Bank account no.',
                'date': 'Transacton date',
                'amount.currency': 'Amount currency',
                'amount.amount': 'Amount',
                'status': 'Transaction type',
                'customer_reference': 'Transaction reference',
                'transaction_reference': 'Reference no.',
                'extra_details': 'Additional reference',
                'transaction_details': 'Remarks',
                'final_opening_balance.date': 'Opening balance date',
                'final_opening_balance.status': 'Opening balance status',
                'final_opening_balance.amount.amount':
                'Opening balance amount',
                'final_opening_balance.amount.currency':
                'Opening balance currency',
                'entry_date': 'Entry date',
                'funds_code': 'Fund code',
                'guessed_entry_date': 'Addl. Entry date',
                'id': 'ID',
                'available_balance.date': 'Available balance date',
                'available_balance.status': 'Available balance type',
                'available_balance.amount.amount': 'Available balance',
                'available_balance.amount.currency':
                'Available balance currency',
                'final_closing_balance.date': 'Ledger balance date',
                'final_closing_balance.status': 'Ledger balance type',
                'final_closing_balance.amount.amount': 'Ledger balance amount',
                'final_closing_balance.amount.currency':
                'Ledger balance currency',
                'sequence_number': 'Sequence no.',
                'statement_number': 'Statement no.'
            })
    except:
        pass
    #print(df2.head(3))
    #print(df2.shape)
    writer = ExcelWriter(filename)

    df2.to_excel(writer, sheet_name='Transactional_Data', index=False)
    writer.save()

    try:
        d[:] = [item for item in d if not item['transactions']]
        df = json_normalize(d)
        df = df[[
            'account_identification', 'final_opening_balance.date',
            'final_opening_balance.status',
            'final_opening_balance.amount.amount',
            'final_opening_balance.amount.currency',
            'final_closing_balance.date', 'final_closing_balance.status',
            'final_closing_balance.amount.amount',
            'final_closing_balance.amount.currency', 'available_balance.date',
            'available_balance.status', 'available_balance.amount.amount',
            'available_balance.amount.currency', 'sequence_number',
            'statement_number', 'transaction_reference'
        ]]
        df = df.rename(
            columns={
                'account_identification': 'Bank account no.',
                'final_opening_balance.date': 'Opening balance date',
                'final_opening_balance.status': 'Opening balance status',
                'final_opening_balance.amount.amount':
                'Opening balance amount',
                'final_opening_balance.amount.currency':
                'Opening balance currency',
                'final_closing_balance.date': 'Ledger balance date',
                'final_closing_balance.status': 'Ledger balance type',
                'final_closing_balance.amount.amount': 'Ledger balance amount',
                'final_closing_balance.amount.currency':
                'Ledger balance currency',
                'available_balance.date': 'Available balance date',
                'available_balance.status': 'Available balance type',
                'available_balance.amount.amount': 'Available balance',
                'available_balance.amount.currency':
                'Available balance currency',
                'sequence_number': 'Sequence no.',
                'statement_number': 'Statement no.',
                'transaction_reference': 'Transaction reference'
            })
        #         del df['transactions']

        # # print(df.head(3))
        # print(df.shape)

        df.to_excel(writer, sheet_name='Non_Transactional_Data', index=False)
        writer.save()
    except:
        pass
コード例 #41
0
def main():
    """
    Main function to run program
    """

    # Set up logging
    logging.basicConfig(level=logging.DEBUG)
    logger = logging.getLogger(__name__)
    handler = logging.FileHandler(LOGGERLOCATION)
    handler.setLevel(logging.INFO)
    # create a logging format
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    handler.setFormatter(formatter)
    # add the handlers to the logger
    logger.addHandler(handler)

    logger.info('Starting...')

    # I write back to the original dataframe and pandas warns about that, so turning off the warning
    pd.options.mode.chained_assignment = None

    # Import dataframe from original xls
    df = import_xls_to_df(DATAFILENAME, DATASHEETNAME)

    print(len(df))

    logger.info('Raw dataframe length before any processing: ' + repr(len(df)))

    # Add a column for URL pinging response
    add_column(df, 'URL status')

    # Clean the dataframe
    df = clean_data(df, 'Year First Provided')

    # Get a list of rootdomains (i.e. netloc) of URLs
    rootdomainsdf = get_root_domains(df, 'URL')

    # Adds data into df about status of the URL at which software is stored
    url_check = check_url_status(df, 'URL', 'URL status')
    url_df = pd.concat([url_check['URL'], url_check['URL status']],
                       axis=1,
                       keys=['URL', 'URL status'])

    # Count the unique values in columns to get summaries of open/closed/no licence, which university released outputs, where outputs are being stored and in which year outputs were recorded
    open_source_licence = produce_count_and_na(df, 'Open Source?')
    open_source_licence.index = open_source_licence.index.fillna('No response')
    universities = produce_count_and_na(df, 'RO')
    unique_rootdomains = produce_count_and_na(rootdomainsdf, 'rootdomains')
    year_of_return = produce_count(df, 'Year First Provided')
    url_status = produce_count(df, 'URL status')

    # Want this to be sorted in year order rather than in order of largest count
    year_of_return.sort_index(inplace=True)

    # Collate all impact statements into a text file for later word cloud generation
    impact_to_txt(df, 'Impact')

    # Plot results and save charts
    plot_bar_charts(open_source_licence, 'opensource',
                    'Is the output under an open-source licence?', None,
                    'No. of outputs', 0)
    plot_bar_charts(universities, 'universities',
                    'Top 30 universities that register the most outputs', None,
                    'No. of outputs', 30)
    plot_bar_charts(unique_rootdomains, 'rootdomain',
                    '30 most popular domains for storing outputs', None,
                    'No. of outputs', 30)
    plot_bar_charts(year_of_return, 'returnyear',
                    'When was output first registered?', None,
                    'No. of outputs', 0)

    # Write results to Excel spreadsheet for the shear hell of it
    writer = ExcelWriter(EXCEL_RESULT_STORE)
    open_source_licence.to_excel(writer, 'opensource')
    universities.to_excel(writer, 'universities')
    unique_rootdomains.to_excel(writer, 'rootdomain')
    year_of_return.to_excel(writer, 'returnyear')
    url_df.to_excel(writer, 'urlstatus')
    url_status.to_excel(writer, 'urlstatus_summ')
    df.to_excel(writer, 'Resulting_df')
    writer.save()
コード例 #42
0
        a = list(decomposition(df1["discounted_price"][j]))
    for i in range(0, 6):
        if (i == 0):
            df1["month_1"][c] = a[i]
        if (i == 1):
            df1["month_2"][c] = a[i]
        if (i == 2):
            df1["month_3"][c] = a[i]
        if (i == 3):
            df1["month_4"][c] = a[i]
        if (i == 4):
            df1["month_5"][c] = a[i]
        if (i == 5):
            df1["month_6"][c] = a[i]
    c = c + 1
print("ho ho ho ho")
df1.insert(9, "cancel", 0)
user_id = pd.Series([])
cancel = pd.Series([])
for k in range(len(df1)):
    if df1["user_id"][k] == '0':
        df1["user_id"][k] = (random.randrange(1, 150, 3))
        df1["cancel"][k] = (random.randrange(0, 3, 1))

df2 = df1.groupby("user_id").sum()
print(df2)

writer = ExcelWriter('new.xls')
df2.to_excel(writer, 'Sheet1')
writer.save()
コード例 #43
0
def fix_gene_protein_inconsistencies(config, hgnc_df_filename, log):
    # create a excel spreadsheet with the HGNC and antibody-gene-p map
    log.info('\tstart fixing gene/protein inconsistencies')
    corrected_aa_files_dir = config['protein']['corrected_aa_file_dir']
    writer = ExcelWriter(corrected_aa_files_dir +
                         'antibody-gene-protein-map.xlsx')

    antibody_to_gene_map, antibody_to_protein_map = get_antibody_gene_protein_map(
        config, log)

    # create a dataframe to work with
    log.info('\t\tcreate combined antibody to gene/protein map')
    aa_map = []
    for i in antibody_to_gene_map:
        num_gene_names = len(filter(len, antibody_to_gene_map[i]))
        num_protein_names = len(filter(len, antibody_to_protein_map[i]))
        aa_map.append({
            'composite_element_ref': i,
            'gene_name': antibody_to_gene_map[i],
            'protein_name': antibody_to_protein_map[i],
            'num_genes': num_gene_names,
            'num_proteins': num_protein_names
        })

    data_library = pd.DataFrame(aa_map)

    # --------------------------part 1--------------------------------------
    ## check other potential protein and gene names
    protein_lists = data_library['protein_name'].tolist()
    gene_lists = data_library['gene_name'].tolist()

    protein_names = [item for i in protein_lists for item in i]
    gene_names = [item for i in gene_lists for item in i]

    data_library.loc[:,
                     'other_protein_names'] = data_library['protein_name'].map(
                         lambda x: get_variants_of_name(x[0], protein_names))
    data_library.loc[:, 'other_gene_names'] = data_library['gene_name'].map(
        lambda x: get_variants_of_name(x[0], gene_names))
    data_library.loc[:, 'final_curated_protein'] = data_library['protein_name']

    #--------------------------part 2 ----------------------------------------
    # HGNC validation
    hgnc_df = parse_hgnc(hgnc_df_filename, log)
    hgnc_df.to_excel(writer, 'HGNC_validated_genes')
    writer.save()

    # this is an hack if we find multiple genes
    log.info('\t\tcombine multiple genes in record')
    for idx, row in data_library.iterrows():
        record = row.to_dict()

        all_val_statuses = []
        all_val_genes = []
        additional_notes = ''
        for genelist in record['gene_name']:
            ind_val_status = []
            ind_val_gene = []
            ind_val_notes = []
            for gene in genelist.split():
                val_status, val_gene, additional_notes = hgnc_validation(
                    gene, hgnc_df)
                ind_val_status.append(val_status)
                ind_val_gene.append(val_gene)
                ind_val_notes.append(additional_notes)
            all_val_statuses.append(" ".join(ind_val_status))
            all_val_genes.append(" ".join(ind_val_gene))
            additional_notes = ";".join(list(set(ind_val_notes)))

        data_library.loc[idx, 'HGNC_validation_status'] = all_val_statuses
        data_library.loc[idx, 'final_curated_gene'] = all_val_genes
        additional_notes = additional_notes.strip()
        if additional_notes:
            data_library.loc[idx, 'additional_notes'] = additional_notes

    # -----------------Rank the dataframe----------------------------------#
    # rank the data frame
    log.info('\t\trank records')
    data_library = rank_dataframe(data_library)
    data_library = data_library.sort(['row_rank'], ascending=[1])
    col_order = [
        'composite_element_ref', 'num_genes', 'num_proteins', 'gene_name',
        'protein_name', 'HGNC_validation_status', 'other_protein_names',
        'other_gene_names', 'final_curated_gene', 'final_curated_protein',
        'row_rank', 'notes', 'additional_notes'
    ]
    data_library.to_excel(writer,
                          'antibody-gene-protein-map',
                          index=False,
                          columns=col_order)
    writer.save()
    log.info('\tdone fixing gene/protein inconsistencies')
コード例 #44
0
def PostProcess(data_dir='./out', log_dir=None):

    if (log_dir):
        if not os.path.exists(log_dir):
            os.makedirs(log_dir)
        log_file = open(log_dir + "postProcess.log", "a")
        old_stdout = sys.stdout
        sys.stdout = log_file

    print("Scanning files ...")

    agg_data_df = pd.DataFrame(
        columns=['Name', 'URL', 'Phone', 'Address', 'Category', 'Industry'])
    for root, dirs, files in os.walk(data_dir):
        for name in files:
            file_nature = name.split('_')[0]
            if file_nature == "vendorsOf":
                file_name = os.path.join(root, name)
                print(f"Adding : {name} ..")
                file_df = pd.read_excel(file_name)
                agg_data_df = agg_data_df.append(file_df,
                                                 ignore_index=True,
                                                 sort=False)

    agg_data_df.sort_values('Name', inplace=True)
    agg_data_df.drop_duplicates('URL', inplace=True)
    print(f"Found {len(agg_data_df.index)} distinct product URLs")

    writer = ExcelWriter(data_dir + '/all_vendors.xlsx')
    d = {
        'Name': agg_data_df['Name'],
        'URL': agg_data_df['URL'],
        'Phone': agg_data_df['Phone'],
        'Address': agg_data_df['Address'],
        'Category': agg_data_df['Category'],
        'Industry': agg_data_df['Industry']
    }
    del agg_data_df
    all_vendors_df = pd.DataFrame(
        d, columns=['Name', 'URL', 'Phone', 'Address', 'Category', 'Industry'])

    del d

    try:
        all_vendors_df.to_excel(writer, index=False, encoding='UTF-8')
        writer.save()
        writer.close()
        print(
            f"Found {len(all_vendors_df.index)} distinct vendors !\nRecap file saved as XLSX at {out_dir}/all_vendors.xlsx"
        )
    except Exception as e:
        print(f"Error saving recap file as XLSX")
        writer.close()

    all_vendors_df.to_csv('./out/all_vendors.csv',
                          index=False,
                          sep='|',
                          encoding='UTF-8')
    print(f"Recap file saved as CSV at ./out/all_vendors.csv")

    sample_writer = ExcelWriter(data_dir + 'sample_vendors.xlsx')
    sample_vendors_df = all_vendors_df.iloc[0:500, :]
    try:
        samplevendors_df.to_excel(sample_writer, index=False, encoding='UTF-8')
        sample_writer.save()
        sample_writer.close()
        print(f"Sample file saved as XLSX at {out_dir}/sample_vendors.xlsx")
    except Exception as e:
        print(f"Error saving sample file as XLSX")
        sample_writer.close()

    sample_vendors_df.to_csv('./out/sample_vendors.csv',
                             index=False,
                             sep='|',
                             encoding='UTF-8')
    print(f"Sample file saved as CSV at ./out/sample_vendors.csv\nDONE")

    if (log_dir):
        sys.stdout = old_stdout
        log_file.close()

    print(f"Found {len(all_vendors_df.index)} distinct vendor URLs")
    print("PostProcess : DONE.")

    return 0
コード例 #45
0
ファイル: statistical.py プロジェクト: Sansiwanghong/HR
    def main(self):
        lateAllDf, late2HDf, late2HomitDf, lateSummary, lateSummaryDep = self.lateAnalyzer(
        )
        lateXlsx = ExcelWriter(self.op + self.thisYear + '年' + self.thisMonth +
                               '月迟到情况统计汇总表.xlsx')
        lateAllDf.columns = [
            '考勤号码', '姓名', '部门', '日期', '时间', '签到时间', '上班时间', '迟到时长', '迟到时长(分钟)'
        ]
        lateAllDf.to_excel(lateXlsx, '个人明细', index=False)
        late2HDf.columns = [
            '考勤号码', '姓名', '部门', '日期', '时间', '签到时间', '上班时间', '迟到时长', '迟到时长(分钟)'
        ]
        late2HDf.to_excel(lateXlsx, '迟到2小时以内', index=False)
        late2HomitDf.columns = [
            '考勤号码', '姓名', '部门', '日期', '时间', '签到时间', '上班时间', '迟到时长', '迟到时长(分钟)'
        ]
        late2HomitDf.to_excel(lateXlsx, '去除免责因素以外迟到', index=False)
        lateSummary.columns = [
            '考勤号码', '姓名', '部门', '去除免责是否月均迟到超过3次', '去除免责迟到次数',
            '是否月均迟到2小时以内超过3次', '迟到2小时以内次数'
        ]
        lateSummary.to_excel(lateXlsx, '个人汇总', index=False)
        lateSummaryDep.columns = [
            '部门', '人数', self.thisMonth + '月工作日数', '应出勤总天数', '去除免责总迟到人次',
            '去除免责月均迟到率', '去除免责月均迟到超过3次人数', '去除免责月均迟到超过3次人数占部门人数比例',
            '去除免责月均迟到超过3次人次', '去除免责月均迟到超过3次人次占部门迟到人次比例', '迟到2小时以内总人次',
            '月均2小时以内迟到率', '月均迟到2小时以内超过3次人数', '月均迟到2小时以内超过3次人数占部门人数比例',
            '月均迟到2小时以内超过3次人次', '月均迟到2小时以内超过3次人次占部门迟到人次比例'
        ]
        lateSummaryDep.to_excel(lateXlsx, '部门汇总', index=False)
        lateXlsx.save()
        lateXlsx.close()
        print('-----------' + self.thisYear + '年' + self.thisMonth +
              '月迟到情况统计汇总表.xlsx' + ' 已生成-----------')

        #        extraAllDf, extraBossDf, extraDepartureDf, personExtraSummary, extraSummaryDf = self.extraAnalyzer()
        #        extraXlsx = ExcelWriter(self.op+self.thisYear+'年'+self.thisMonth+'月加班情况统计汇总表.xlsx')
        #        extraSummaryDf.columns = ['总人数','去除各种假日','去除全月不考勤数值','去除全月不考勤占比','17:05以内打卡数值','17:05以内打卡占比',
        #                                  '17:10以内打卡数值','17:10以内打卡占比','17:15以内打卡数值','17:15以内打卡占比','平均加班30分钟以上数值','平均加班30分钟以上占比','平均加班60分钟以上数值','平均加班60分钟以上占比']
        #        extraSummaryDf.to_excel(extraXlsx,'汇总表', index = False)
        #        extraDepartureDf.columns = ['考勤所属部门','部门人数','加班总时长(分钟)','出勤天数',self.thisMonth+'月工作日数','人均加班时长(分钟)','日均加班时长(分钟)','人均日均加班时长(分钟)','人均考勤率']
        #        extraDepartureDf.to_excel(extraXlsx,'部门汇总', index = False)
        #        extraBossDf.columns = ['考勤号码','姓名','加班总时长(分钟)','出勤天数',self.thisMonth+'月工作日数','日均加班时长(分钟)','人均考勤率']
        #        extraBossDf.to_excel(extraXlsx,'管理序列汇总', index = False)
        #        extraAllDf.columns = ['考勤号码','姓名','部门','日期','时间','签退时间','下班时间','加班时长','加班时长(分钟)']
        #        extraAllDf.to_excel(extraXlsx,'个人明细', index = False)
        #        personExtraSummary.columns = ['姓名','该月平均每天加班时长(分钟)']
        #        personExtraSummary.to_excel(extraXlsx,'员工加班时长排序', index = False)
        #        extraXlsx.save()
        #        extraXlsx.close()
        #        print ('-----------'+self.thisYear+'年'+self.thisMonth+'月加班情况统计汇总表.xlsx'+' 已生成-----------')

        #######################屏蔽掉missAnalyzer在wideTable中暂时缺失的问题
        missAllDf, missPersionDf, missDepDf = self.missAnalyzer()
        missXlsx = ExcelWriter(self.op + self.thisYear + '年' + self.thisMonth +
                               '月无考勤情况统计汇总表.xlsx')
        missDepDf.columns = [
            '部门', '部门人数', '部门工作日无考勤人次', '人均无考勤人次', '当月累计无考勤天数超过10个工作日的人数'
        ]
        missDepDf.to_excel(missXlsx, '部门汇总', index=False)
        #########bug
        missPersionDf.columns = [
            '考勤号码', '姓名', '部门', '无考勤天数', '当月累计无考勤天数是否超过10个工作日'
        ]
        missPersionDf.to_excel(missXlsx, '个人汇总', index=False)
        missAllDf.columns = [
            '考勤号码', '姓名', '部门', '日期', '时间', '签到时间', '上班时间', '迟到时长', '加班时长(分钟)'
        ]
        missAllDf.to_excel(missXlsx, '考勤原始明细', index=False)
        missXlsx.save()
        missXlsx.close()
        print('-----------' + self.thisYear + '年' + self.thisMonth +
              '月无考勤情况统计汇总表.xlsx' + ' 已生成-----------')
コード例 #46
0
ファイル: base_functions.py プロジェクト: popo9192/NBA
def saveToExcel(df,filename,tab):
    writer = ExcelWriter(filename)
    df.to_excel(writer,tab)
    writer.save()
コード例 #47
0
def write_to_excel(exl_list, cols):

    df1 = pd.DataFrame(exl_list, columns=cols)
    writer = ExcelWriter('static/download/URLLookupService.xlsx')
    df1.to_excel(writer, 'URLLookUpService', index=False)
    writer.save()
コード例 #48
0
def create_excel(dt,name):
    writer = ExcelWriter(name)  #creates an instance to write data
    dt.to_excel(writer, sheet_name='Sheet1')
    writer.save()
コード例 #49
0
    # identify 50 metro areas in our list
    df1['in50'] = df1.apply(metro_matching, axis=1)

    # Only retain rows for 50 metro areas in our list
    df1 = df1[df1['in50']==1]

    # create a column with names from our list for consistency
    df2 = df1.copy()
    df2['Metro name'] = df1['metro_name'].apply(std_name)

    # drop the unnecessary columns and bring the last column first
    df2.drop(columns=['Place','metro_name', 'state_name', 'in50'], inplace=True)

    cols = list(df2.columns)
    cols = [cols[-1]] + cols[:-1]
    df2 = df2[cols]

    return df2

# create individual dataframes and concatenate them along the index
df_AL = dfCreator(HPI_AL)
df_MZ = dfCreator(HPI_MZ)

df_joined = pd.concat([df_AL, df_MZ], ignore_index=True)

# write it in an Excel file
w = ExcelWriter('House Price Index.xlsx')
df_joined.to_excel(w)
w.save()
コード例 #50
0
ファイル: main.py プロジェクト: sofiamuneramd/Dia_25_10abril
copia1 = pd.DataFrame({
    'Nombre': nombres,
    'Primer Apellido': apellido,
    'Segundo Apellido': ['Medina', 'Montoya', 'Peña']
})

# Introduciomos la siguiente linea para que las columnas queden en un orden especifico

copia1 = copia1[['Nombre', 'Primer Apellido', 'Segundo Apellido']]

# Ahora creamos y guardamos el archivo. Index=False es para evitar que se cree una columna adicional con numeracion

archivo = ExcelWriter('copia1.xls')
copia1.to_excel(archivo, 'Hoja Copia', index=False)
archivo.save()
archivo.close()

# EJERCICIO 2

File = pd.ExcelFile('Libro2.xls')
hoja1 = File.parse('Hoja1')
paises = hoja1['Paises'].values

copia1 = pd.DataFrame({
    'Pais':
    paises,
    'Capital': ['Ottawa', 'Ciudad de México', 'Bogotá', 'Madrid', 'Moscú']
})

copia1 = copia1[['Pais', 'Capital']]
コード例 #51
0
ファイル: Report_1.py プロジェクト: patilprasanth/Allways-inc
def main():
    # Here we are reading the raw data csv file
    re_style = ReportStyles()
    csv_file = pd.read_csv('google.csv')
    # Declaring list variables
    month = []
    year = []
    Stock = []
    # assigning values to variable
    stock = csv_file['Stock']
    # looping to extract months and year from date
    for index in csv_file.index:
        #assiging all the date to a variable
        dates = pd.DatetimeIndex(csv_file['date'])
        #appending years to a list
        year.append(dates[index].year)
        #appending months to a list
        month.append(dates[index].month)
        #appending stock to a list
        Stock.append(stock[index])
    #creating a data dictonary
    data = {'Stock': Stock, 'Month': month, 'Year': year}
    #creating a pandas dataframe from dict
    frame = pd.DataFrame(data)
    #Merging two data frames into one common data frame
    csv_file = pd.merge(csv_file, frame, on='Stock')
    #pivoting the data
    piv = csv_file.pivot_table(['Open', 'High'],
                               rows='Month',
                               cols='Year',
                               margins=True,
                               aggfunc='count')
    #writing pivot table to an excel
    piv.to_excel('temp.xls')
    book = open_workbook('temp.xls')
    #reading the first sheet from excel
    sheet0 = book.sheet_by_index(0)
    col_cnt = sheet0.ncols
    row_cnt = sheet0.nrows
    pd1 = pd.read_excel(io='temp.xls', sheetname='Sheet1')
    pd2 = pd.read_excel(io='temp.xls', sheetname='Sheet1')
    writer = ExcelWriter('temp1.xls')
    pd1.to_excel(writer, 'Sheet1', startcol=0, startrow=2)
    pd2.to_excel(writer, 'Sheet1', startcol=(col_cnt + 2), startrow=2)
    writer.save()

    book = open_workbook('temp1.xls')
    #reading the first sheet from excel
    sheet0 = book.sheet_by_index(0)
    col_cnt1 = sheet0.ncols
    row_cnt1 = sheet0.nrows

    currency = XFStyle()
    currency.borders = re_style.borders_light()
    currency.alignment = re_style.align_hor_right()
    currency.num_format_str = "[$$-409]#,##0.00;-[$$-409]#,##0.00"

    headings = XFStyle()
    headings.borders = re_style.borders_light()
    headings.alignment = re_style.align_hor_center()
    headings.font = re_style.text_bold()

    no_borders = XFStyle()
    no_borders.borders = re_style.no_borders()

    wb = Workbook()
    ws = wb.add_sheet('Sample_Report', cell_overwrite_ok=True)
    for row in range(row_cnt1):
        for col in range(col_cnt1):
            val = sheet0.cell_value(row, col)
            if row < 2:
                ws.row(row).write(col, val, no_borders)
            elif col == (col_cnt + 2):
                ws.row(row).write(col, val, headings)
            # elif col > col_cnt and col < (col_cnt+3):
            #     ws.row(row).write(col, val, no_borders)
            elif row > 4 and col > 0:
                ws.row(row).write(col, val, currency)
            elif row > 4 and col > (col_cnt + 3):
                ws.row(row).write(col, val, currency)
            else:
                ws.row(row).write(col, val, headings)
            wb.save('Report-1.xls')
コード例 #52
0
def __process_zhejiang_IMEI(callFailData, path, file_pre, cs_ps):
    model_list_fp = open(os.path.join('.', 'config', '云诊断内销浙江统计机型列表.txt'), 'r')
    modelList = []
    for model in model_list_fp.readlines():
        modelList.append(model.strip())

    xls_fileName = os.path.join(path,
                                file_pre + '_数据分析结果_浙江IMEI' + cs_ps + '.xls')
    workbook = xlsxwriter.Workbook(xls_fileName)

    #---对每一个型号进行过滤和对比
    #如果包含在写入excel表格
    list_result = []
    for model in modelList:
        model0 = model.split('_')[0]
        model1 = model.split('_')[1]

        worksheet = workbook.add_worksheet(model)
        worksheet.set_column('A:A', 20)

        before = str(callFailData.shape[0])
        callFailData_after = callFailData[callFailData['外部机型'] == model0]
        after = str(callFailData_after.shape[0])

        print('开始过滤' + model + '...' + after + '/' + before)

        #获取dataframe中的所有IMEI数据
        imeiList_a = []
        for imei in callFailData_after['imei'].tolist():
            imeiList_a.append(str(imei).strip())

        #获取文件中浙江的IMEI列表
        imeiList_b = []
        fileName = os.path.join('.', 'zhejiang_imei', model1 + '.txt')
        imeiFile_fp = open(fileName, 'r')
        imei_zhejiang = imeiFile_fp.readlines()
        for imei in imei_zhejiang:
            imeiList_b.append(imei.strip())

        #获得浙江IMEI列表和dataframe IMEI中的交集
        IMEI_intersection = list(set(imeiList_a).intersection(set(imeiList_b)))
        #print('a='+str(len(imeiList_a))+',b='+str(len(imeiList_b))+',intersection='+str(len(IMEI_intersection)))

        #按照dataframe的数量排序,获取浙江输出到excel
        callFailData_IMEI = callFailData_after['imei'].value_counts()
        allIMEI = callFailData_IMEI.index.tolist()

        row_i = 0
        for imei_i in range(len(allIMEI)):
            for imei_filtered in IMEI_intersection:
                if (imei_filtered == allIMEI[imei_i]):

                    worksheet.write(row_i, 0, imei_filtered)
                    worksheet.write(row_i, 1, callFailData_IMEI.values[imei_i])
                    list_result.append(
                        (imei_filtered, callFailData_IMEI.values[imei_i]), )
                    row_i += 1

    #---对所有过滤出来的浙江IMEI计算Top
    print('ouput all...')
    worksheet = workbook.add_worksheet('all')
    worksheet.set_column('A:A', 20)
    mylist = sorted(list_result, key=lambda t: t[1], reverse=True)
    for i in range(len(mylist)):
        worksheet.write(i, 0, mylist[i][0])
        worksheet.write(i, 1, mylist[i][1])
    workbook.close()

    length_mylist = 0
    if (len(mylist) < 1):
        callFailData_internal = pd.DataFrame(columns=callFailData.columns)
    else:
        if (len(mylist) < 10):
            length_mylist = len(mylist)
        else:
            length_mylist = 10

        callFailDataList = []
        for i in range(length_mylist):
            callFailData_internal = callFailData[callFailData['imei'] ==
                                                 mylist[i][0]]
            callFailDataList.append(callFailData_internal)

        callFailData_internal = pd.DataFrame(columns=callFailData.columns)
        for i in range(1, len(callFailDataList)):
            callFailData_internal = callFailData_internal.append(
                callFailDataList[i], ignore_index=True)

    xls_fileName1 = os.path.join(
        path, file_pre + '_数据分析结果_浙江IMEI详细信息' + cs_ps + '.xlsx')
    writer = ExcelWriter(xls_fileName1)
    callFailData_internal.to_excel(writer, 'data')
    writer.save()
コード例 #53
0
# Podemos realizar operaciones basicas con las series

tamaño2=tamaño1+5

# Ahora importamos Excel writer para poder crear un libro y hoja nueva

from pandas import ExcelWriter

# Guardamos ambos archivos 

file=ExcelWriter('Copia1.xls')
tamaño0.to_excel(file,'Hoja1')
tamaño1.to_excel(file,'Hoja2')
tamaño2.to_excel(file,'Hoja2')
file.save()



File=pd.ExcelFile('Libro2.xls')

hoja1=File.parse('Hoja1')

paises=hoja1['Paises'].values
print(paises)

# Vamos a asignar una variable a cada elemento de la columna 

a=paises[0]
b=paises[1]
c=paises[2]
コード例 #54
0
    buggroup=bug.getElementsByTagName('BugGroup')[0].firstChild.nodeValue
    bugcode=bug.getElementsByTagName('BugCode')[0].firstChild.nodeValue
    bugmessage=bug.getElementsByTagName('BugMessage')[0].firstChild.nodeValue
    buildid=bug.getElementsByTagName('BugTrace')[0].getElementsByTagName('BuildId')[0].firstChild.nodeValue
    assessmentreportfile=bug.getElementsByTagName('BugTrace')[0].getElementsByTagName('AssessmentReportFile')[0].firstChild.nodeValue
    for buglocation in buglocations:
        locations=buglocation.getElementsByTagName('Location')
        for location in locations:
            loc_id=location.getAttribute('id')
            is_primary=location.getAttribute('primary')
            sourcefile=location.getElementsByTagName('SourceFile')[0].firstChild.nodeValue
            startline=location.getElementsByTagName('StartLine')[0].firstChild.nodeValue
            endline=location.getElementsByTagName('EndLine')[0].firstChild.nodeValue
            parasoftbugs.set_value(rowcount,'SourceFile',sourcefile)
            parasoftbugs.set_value(rowcount,'Bug Group',buggroup)
            parasoftbugs.set_value(rowcount,'Bug Code',bugcode)
            parasoftbugs.set_value(rowcount,'Bug Message',bugmessage)
            parasoftbugs.set_value(rowcount,'Build ID',buildid)
            parasoftbugs.set_value(rowcount,'AssessmentReportFile',assessmentreportfile)
            parasoftbugs.set_value(rowcount,'Location ID',loc_id)
            parasoftbugs.set_value(rowcount,'Primary',is_primary)
            parasoftbugs.set_value(rowcount,'StartLine',startline)
            parasoftbugs.set_value(rowcount,'EndLine',endline)
            # if location.hasElement('EndLine'):
            #     endline=location.getElementsByTagName('EndLine')[0].firstChild.nodeValue
            #     parasoftbugs.set_value(rowcount,'EndLine',endline)
            rowcount+=1
xclwrite=ExcelWriter('sql_cpp_bugs.xlsx')
parasoftbugs.to_excel(xclwrite,'Sheet 1',index=True)
xclwrite.save()
コード例 #55
0
def generar_excel(dfautos, nombre):
    nombre_final = nombre + '.xlsx'
    writer = ExcelWriter(nombre_final)
    dfautos.to_excel(writer, 'muestras')
    writer.save()
    print('ok')
コード例 #56
0
def generar_df_excel(df, nombre_archivo):
    nombre_final = nombre_archivo + '.xlsx'
    writer = ExcelWriter(nombre_final)
    df.to_excel(writer, 'Muestras')
    writer.save()
    print('OK')
コード例 #57
0
def func_tt_vardescrible(train, test, train_cols, save_path, tag,
                         file_tag: str):
    path_ = save_path
    from pandas import ExcelWriter
    writer = ExcelWriter(path_ + '_train_test_compare_%s.xlsx' % file_tag)
    train_i = train.copy()
    test_i = test.copy()
    varname_list = []
    varks_list = []
    ks_j_ = []
    ks_i_ = []
    iv_i_ = []
    iv_j_ = []
    check = []
    group = []
    psi_all = []
    num = 0
    for i in train_cols:
        print('turn to ', i)
        if i in train.columns and i != 'intercept':
            ks_i, iv_i, df_gp1 = cal_ks_tt(train_i, i, tag)
            ks_j, iv_j, df_gp2 = cal_ks_tt(test_i, i, tag)
            varname_list.append(i[:-4])
            varks_list.append(abs(ks_i - ks_j))
            ks_j_.append(ks_j)
            ks_i_.append(ks_i)
            iv_i_.append(iv_i)
            iv_j_.append(iv_j)
            group.append(df_gp1.shape[0])
            df_gp1 = df_gp1.reset_index()
            df_gp2 = df_gp2.reset_index()
            df_gp1.index = df_gp1[i]
            df_gp2.index = df_gp2[i]
            df_describle = pd.concat([df_gp1, df_gp2],
                                     axis=1,
                                     keys=['TRAIN', 'CROSS'],
                                     sort=False)
            df_describle = df_describle.reset_index(drop=True)
            df_describle['PSI'] = (
                df_describle[('TRAIN', 'pct_bin')] -
                df_describle[('CROSS', 'pct_bin')]) * np.log(df_describle[
                    ('TRAIN', 'pct_bin')] / df_describle[('CROSS', 'pct_bin')])
            psi = sum([ii for ii in df_describle['PSI'] if not pd.isnull(ii)])
            psi_all.append(psi)
            df_describle = df_describle.reset_index(drop=True)
            # df_describle = df_describle.sort_values(('TRAIN', 'Woe'))

            # 我从来没有想过会出现test不单调的情况,但是它居然出现了;没办法,只能加个判定了
            # ————————————判定开始————————————
            test_woe = df_describle['TEST']['Woe'].tolist()
            if pd.Series(test_woe).is_monotonic_decreasing or pd.Series(
                    test_woe).is_monotonic_increasing:
                check.append(0)
            else:
                check.append(1)
            # ————————————判定结束————————————
            df_describle.to_excel(writer, 'var_details', startrow=num)
            num += len(df_describle) + 4
    test_ks = pd.DataFrame({
        'var': varname_list,
        'ks_train': ks_i_,
        'ks_test': ks_j_,
        'ks_dif': varks_list,
        'iv_train': iv_i_,
        'iv_test': iv_j_,
        'check': check,
        'group': group,
        'PSI': psi_all
    })
    ks_sort = test_ks.sort_values('ks_test', ascending=False)[[
        'var', 'iv_train', 'iv_test', 'ks_train', 'ks_test', 'ks_dif', 'group',
        'check', 'PSI'
    ]]
    ks_sort.to_excel(writer, 'summary', startrow=0)
    writer.save()
コード例 #58
0
def f(v):
    if v == None:
        raise PreventUpdate
    print(v)
    #df contains results of minervini screen = all stocks that passed the check
    df = get_stocks(filePath1)
    #pr_table contains table previousrun
    df2 = pd.read_sql_table('previous',
                            'postgresql://*****:*****@localhost/test')

    date_added = dt.date.today()
    #print("Neuer teil - 1")
    #print(df)
    #print("Neuer Teil - 2")
    #print(df2)

    #temp1 = pd.merge(df, df2, left_on='Stock', right_on='Stock')
    temp = pd.merge(df,
                    df2,
                    on='Stock',
                    how='left',
                    suffixes=('_left', '_right'))
    print("Temp 2:")
    print(temp)

    newFile = os.path.dirname(filePath2) + "/Temp.xlsx"
    print(filePath2)
    print(newFile)
    writer = ExcelWriter(newFile)
    temp.to_excel(writer, "Sheet1")
    writer.save()

    exportListNew = pd.DataFrame(columns=['Stock', "Date added"])

    #for i in temp.index:

    #if temp["50 Day MA_left"][i] == temp["50 Day MA_right"][i]:
    #    print(temp["Stock"][i] + " ok")
    #elif temp["50 Day MA_left"][i] != temp["50 Day MA_right"][i]:
    #    print(temp["Stock"][i] + " unequal")
    #    print(temp["50 Day MA_right"][i])
    #    print(temp["50 Day MA_left"][i])
    #    if math.isnan(temp["50 Day MA_right"][i]):
    #        exportListNew = exportListNew.append({'Stock': temp["Stock"][i], "Date added": date_added}, ignore_index=True)
    #    #if temp["50 Day MA_right"][i] == "nan":
    #        print(temp["Stock"][i] + " is right empty")
    #add todays date in "date added"
    #    elif math.isnan(temp["50 Day MA_left"][i]):
    #        print(temp["Stock"][i] + " is left empty")

    print(exportListNew)
    #compare df with existing db table previousrun (read it from db and save it as df2)
    #if stock from df exists in previousrun -> ok
    #if stock from df does not exist in previousrun -> new stock - date_added = today
    #if stock from previousrun is not part of new df -> stock no longer valid -> needs to be deleted from db

    if exportListNew.empty:
        print("Keine Änderungen seit gestern.")
        nochange = {
            'Stock': ['Keine Änderung seit '],
            'Date added': ['1900-01-01']
        }
        nochangelist = pd.DataFrame(nochange)
        table2 = dbc.Table.from_dataframe(nochangelist,
                                          striped=True,
                                          bordered=True,
                                          responsive=True,
                                          hover=True)
    else:
        table2 = dbc.Table.from_dataframe(exportListNew,
                                          striped=True,
                                          bordered=True,
                                          responsive=True,
                                          hover=True)

    table1 = dbc.Table.from_dataframe(df,
                                      striped=True,
                                      bordered=True,
                                      responsive=True,
                                      hover=True)
    #print(df)

    #clear db table before saving the latest stocks
    try:
        db.session.query(Previous).delete()
        db.session.commit()
        print("Clear db committed")
    except:
        db.session.rollback()
        print("DB rollback scenario")
    ##save to db
    pg = pd.DataFrame(df)
    pg.to_sql("previous", con=db.engine, if_exists='replace', index=False)
    ## end save to db

    newFile = os.path.dirname(filePath2) + "/ScreenOutput.xlsx"
    print(filePath2)
    print(newFile)
    writer = ExcelWriter(newFile)
    df.to_excel(writer, "Sheet1")
    writer.save()

    return dbc.Row(children=[dbc.Card(table2), dbc.Card(table1)])
コード例 #59
0
portfolios = ['Warrant Trading', 'CBBC Trading']
portfolio_data = {
    portfolio: make_portfolio_data(portfolio)
    for portfolio in portfolios
}

# In[ ]:

excel_writer = ExcelWriter(output_file_path)

for portfolio, (pivot, pivot_to_display, plot_pivot) in portfolio_data.items():
    pivot.to_excel(excel_writer, '{} pivot'.format(portfolio))
    pivot_to_display.to_excel(excel_writer, '{} pivot'.format(portfolio))
    plot_pivot.to_excel(excel_writer, '{} plot_pivot'.format(portfolio))

excel_writer.save()

# In[ ]:

with sns.axes_style('dark', {'font.family': ['SimHei'], 'axes.grid': False}):
    fig, axes = plt.subplots(ncols=2)
    fig.set_figheight(5)
    fig.set_figwidth(15)

    for portfolio, ax in zip(portfolios, axes):

        _, _, plot_pivot = portfolio_data[portfolio]

        sns.barplot(x='Broker', y='Turnover', data=plot_pivot, ax=ax)
        ax.set_yticklabels([])
        ax.set_ylim(100)
コード例 #60
0
ファイル: excel_io.py プロジェクト: lijiansong/lang
def extract_into_excel():
    '''
    Dependency contains pandas and openpyxl.
    If you are using python3 @ Ubuntu, then:
    apt install python3-pandas
    apt install python3-xlsxwriter
    '''
    # TODO: this coding style is very ugly.
    prefix = os.getcwd().split("/")[-2]
    end2end_fps_file = prefix + '-end2end_fps.txt'
    hardware_fps_file = prefix + '-hardware_fps.txt'
    total_exe_time_file = prefix + '-total-exe-time.txt'
    final_acc_file = prefix + '-global-acc.txt'

    prepare_input_time_file = prefix + '-prepare_input.txt'
    copyin_time_file = prefix + '-copyin_time.txt'
    execution_time_file = prefix + '-execution_time.txt'
    copyout_time_file = prefix + '-copyout_time.txt'
    post_process_time_file = prefix + '-post_process_time.txt'

    sparsity_list = []
    batch_size_list = []
    data_parallel_list = []
    model_parallel_list = []
    thread_num_list = []
    fifo_size_list = []
    end2end_fps_list = []
    hardware_fps_list = []
    total_exe_time_list = []
    final_acc_list = []

    prepare_input_time_list = []
    copyin_time_list = []
    execution_time_list = []
    copyout_time_list = []
    post_process_time_list = []

    # end to end fps
    file_reader = open(end2end_fps_file, 'r')
    try:
        text_lines = file_reader.readlines()
        #print(text_lines)
        for line in text_lines:
            sparsity, batch_size, data_parallel, model_parallel, thread_num, end2end_fps = line.split(
                ",")
            sparsity_list.append(float(sparsity))
            batch_size_list.append(int(batch_size))
            data_parallel_list.append(int(data_parallel))
            model_parallel_list.append(int(model_parallel))
            thread_num_list.append(int(thread_num))
            fifo_size_list.append(2)
            end2end_fps_list.append(float(end2end_fps))
    finally:
        file_reader.close()

    # hardware fps
    file_reader = open(hardware_fps_file, 'r')
    try:
        text_lines = file_reader.readlines()
        #print(type(text_lines))
        #print(text_lines)
        for line in text_lines:
            _, hardware_fps = line.split(",")
            hardware_fps_list.append(float(hardware_fps))
    finally:
        file_reader.close()

    # total exe time
    file_reader = open(total_exe_time_file, 'r')
    try:
        text_lines = file_reader.readlines()
        #print(type(text_lines))
        #print(text_lines)
        for line in text_lines:
            _, total_exe_time = line.split(",")
            total_exe_time_list.append(float(total_exe_time))
    finally:
        file_reader.close()

    # final top-1 accuracy
    file_reader = open(final_acc_file, 'r')
    try:
        text_lines = file_reader.readlines()
        #print(type(text_lines))
        #print(text_lines)
        for line in text_lines:
            _, final_acc = line.split(",")
            final_acc_list.append(float(final_acc))
    finally:
        file_reader.close()

    # prepare input time
    file_reader = open(prepare_input_time_file, 'r')
    try:
        text_lines = file_reader.readlines()
        #print(type(text_lines))
        #print(text_lines)
        for line in text_lines:
            _, _, _, _, _, prepare_input_time = line.split(",")
            prepare_input_time_list.append(float(prepare_input_time))
    finally:
        file_reader.close()

    # copyin time
    file_reader = open(copyin_time_file, 'r')
    try:
        text_lines = file_reader.readlines()
        #print(type(text_lines))
        #print(text_lines)
        for line in text_lines:
            _, _, _, _, _, copyin_time = line.split(",")
            copyin_time_list.append(float(copyin_time))
    finally:
        file_reader.close()

    # execution time
    file_reader = open(execution_time_file, 'r')
    try:
        text_lines = file_reader.readlines()
        #print(type(text_lines))
        #print(text_lines)
        for line in text_lines:
            _, _, _, _, _, execution_time = line.split(",")
            execution_time_list.append(float(execution_time))
    finally:
        file_reader.close()

    # copyout time
    file_reader = open(copyout_time_file, 'r')
    try:
        text_lines = file_reader.readlines()
        #print(type(text_lines))
        #print(text_lines)
        for line in text_lines:
            _, _, _, _, _, copyout_time = line.split(",")
            copyout_time_list.append(float(copyout_time))
    finally:
        file_reader.close()

    # post process time
    file_reader = open(post_process_time_file, 'r')
    try:
        text_lines = file_reader.readlines()
        #print(type(text_lines))
        #print(text_lines)
        for line in text_lines:
            _, _, _, _, _, post_process_time = line.split(",")
            post_process_time_list.append(float(post_process_time))
    finally:
        file_reader.close()

    assert len(batch_size_list) == len(data_parallel_list) and \
            len(data_parallel_list) == len(model_parallel_list) and \
            len(model_parallel_list) == len(thread_num_list) and \
            len(thread_num_list) == len(fifo_size_list) and \
            len(fifo_size_list) == len(end2end_fps_list) and \
            len(end2end_fps_list) == len(hardware_fps_list) and \
            len(hardware_fps_list) == len(total_exe_time_list) and \
            len(total_exe_time_list) == len(prepare_input_time_list) and \
            len(prepare_input_time_list) == len(copyin_time_list) and \
            len(copyin_time_list) == len(execution_time_list) and \
            len(execution_time_list) == len(copyout_time_list) and \
            len(copyout_time_list) == len(post_process_time_list), \
            " Error! Must have same records length!"

    ordered_dict = collections.OrderedDict()
    ordered_dict['sparsity'] = sparsity_list
    ordered_dict['batch size'] = batch_size_list
    ordered_dict['data parallel'] = data_parallel_list
    ordered_dict['model parallel'] = model_parallel_list
    ordered_dict['thread num'] = thread_num_list
    ordered_dict['fifo size'] = fifo_size_list
    ordered_dict['End to end FPS'] = end2end_fps_list
    ordered_dict['Hardware FPS'] = hardware_fps_list
    ordered_dict['Total execution time(ms)'] = total_exe_time_list
    ordered_dict['Top-1 accuracy'] = final_acc_list
    ordered_dict['Prepare input time(ms)'] = prepare_input_time_list
    ordered_dict['Copyin time(ms)'] = copyin_time_list
    ordered_dict['Execution time(ms)'] = execution_time_list
    ordered_dict['Copyout time(ms)'] = copyout_time_list
    ordered_dict['Post process time(ms)'] = post_process_time_list
    df = pd.DataFrame(ordered_dict)
    excel_file_name = prefix + '.xlsx'
    writer = ExcelWriter(excel_file_name)
    df.to_excel(writer, 'Sheet1', index=False)
    writer.save()