def DFtoExcel(df, FolderName, FileName): write_df = df.loc[:, ["FileName", "hyperlink", "Sheet Name"]] # Path Cell_Search_By_Key MainFolder = "C:\\Cell_Search_By_Key" FolderPath = os.path.join(MainFolder, FolderName) if not os.path.exists(FolderPath): os.makedirs(FolderPath) os.chdir(FolderPath) ExcelName = "%s.xlsx" % FileName writer = ExcelWriter(ExcelName) write_df.to_excel(writer, "Result", index=False) writer.save() # turn path into hyperlink Excel_Path = os.path.join(FolderPath, ExcelName) wb = Workbook(Excel_Path) # wb = Workbook.caller() checkArr = Range("B2").vertical.value i = 2 for check in checkArr: RangeName = "B%d" % (i) displayRange = "A%d" % (i) address = Range(RangeName).value display_name = Range(displayRange).value i += 1 try: Range(RangeName).add_hyperlink(address, text_to_display=address) except: pass wb.save() wb.close() return "FINISH"
def generate_report(title, description): """Generate Excel 1997 file from query. :param title: Query title. :param description: Query description. :return: Response with Excel 1997 attachment. """ df = load_data_frame(request) # Limit the columns to the maximum allowed in Excel 97. max_length = 255 index_len = len(df.index.names) lim_df = df.drop(df.columns[max_length - index_len - 1:len(df.columns) - 1], axis=1) extension = 'xls' engine = 'xlwt' encoding = 'utf-8' content_type = 'application/vnd.ms-excel' # Add content and return response f = NamedTemporaryFile(suffix=extension) ew = ExcelWriter(f.name, engine=engine, encoding=encoding) #print lim_df.to_string() #print f.name lim_df.to_excel(ew) ew.save() #shutil.copyfile(f.name, 'manuel.xls') show_legend = request.REQUEST.get('show_legend', '') table_description = request.REQUEST.get('table_description', '') add_header_and_footer(f.name, title, description, show_legend, table_description) title = title.strip().encode("UTF-8").replace(" ", '_') if len(title) > max_length_filename: title = title[:max_length_filename] filename = '%s.%s' % (title, extension) # Setup response data = f.read() response = HttpResponse(data) response["Content-Type"] = content_type response["Content-status_code"] = 200 response['Content-Transfer-Encoding'] = 'binary' response['Content-Disposition'] = 'attachment; filename="%s"' % filename return response
def dataIO(self, args): """ IO data for possible extension """ writer = ExcelWriter("{}.xlsx".format(args.logFile), engine='xlsxwriter') reportDf = pd.DataFrame() reportDf.to_excel(writer, sheet_name="Reports") contentDf = pd.DataFrame() contentDf.to_excel(writer, sheet_name="Contents") contentSheet = writer.sheets["Contents"] contentSheet.write_string(xl_rowcol_to_cell(self.sheetLinkRow, 0), "link list for all choices and sub refines".format(args.logFile)) self.sheetLinkRow += 1 for dfname in ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12']: if dfname in self._rawdf.keys(): print("--save raw data for {}".format(dfname)) self._rawdf[dfname].to_excel(writer, "{}".format(dfname)) link_format = writer.book.add_format({'color': 'blue', 'underline': 1}) contentSheet.write_url(xl_rowcol_to_cell(self.sheetLinkRow, 0), "internal:{}!A1".format(dfname), link_format, dfname) self.sheetLinkRow += 1 if dfname in self._rawdf.keys() and dfname in ['2', '3', '5', '6', '8', '9', '11', '12']: self.refine(args, writer, dfname) # Close the Pandas Excel writer and output the Excel file. writer.save()
def data_total( DocName, HistoryPath, SavePath ): files = os.listdir(HistoryPath) TotalData = pd.DataFrame() for file in files: historyfile = os.path.join(HistoryPath, file) try: HistoryBook = pd.ExcelFile(historyfile) HistorySheet = HistoryBook.parse('Sheet1', skiprows = 0, index = None) TotalData = TotalData.append(HistorySheet) except IOError: print "Cannot read " + str(historyfile) TotalData.dropna(subset = ['ProductID'], inplace = True) TotalData.drop_duplicates(inplace = True) filename = DocName + '.xlsx' filename = os.path.join(SavePath, filename) writer = ExcelWriter(filename) TotalData.to_excel(writer, 'Sheet1', index = False ) writer.save() TotalData.to_csv(os.path.join(SavePath, DocName + '.txt'),sep=';',index=False, encoding = 'utf-8')
def save_table(self, directory = None, filename = None, table_format = None): ''' Saves the table to some format ''' now = datetime.now() if table_format is None: if filename is not None: extension = filename[-4:] if extension == '.xls': table_format = 'xls' elif extension == '.csv': table_format = 'csv' else: table_format = 'xls' if directory is None: directory = "." if filename is None: filename = 'Aggregates_%s.%s' % (now.strftime('%d-%m-%Y'), table_format) fname = os.path.join(directory, filename) try: df = self.aggr_frame if table_format == "xls": writer = ExcelWriter(str(fname)) df.to_excel(writer, "aggregates", index= False, header= True) descr = self.create_description() descr.to_excel(writer, "description", index = False, header=False) writer.save() elif table_format == "csv": df.to_csv(fname, "aggregates", index= False, header = True) except Exception, e: raise Exception("Aggregates: Error saving file", str(e))
def build_aggregates(): writer = None years = range(2006,2010) for year in years: yr = str(year) # fname = "Agg_%s.%s" %(str(yr), "xls") simu = SurveySimulation() simu.set_config(year = yr) simu.set_param() simu.set_survey() inflator = get_loyer_inflator(year) simu.inflate_survey({'loyer' : inflator}) simu.compute() agg = Aggregates() agg.set_simulation(simu) agg.compute() if writer is None: writer = ExcelWriter(str(fname_all)) agg.aggr_frame.to_excel(writer, yr, index= False, header= True, float_format="%.2f") print agg.aggr_frame.to_string() del simu del agg import gc gc.collect() writer.save()
def main(): parser = argparse.ArgumentParser(description = 'Fantasy Data Visualization') parser.add_argument('players', metavar='PLAYER', \ type=int, nargs='*', help='ids of players to display') parser.add_argument('-d', '--display', type=int, \ choices=[10,25,50], default=10, help='number of rows to display') parser.add_argument('-e', '--excel', dest='excel', \ action='store_true', default=False, help='to excel') args = parser.parse_args() show = int(args.display) # number of stats to show stats = pd.DataFrame.from_csv('.cache/res_avg.csv') # write all stats to excel file if (args.excel): writer = ExcelWriter('.cache/res_avg.xlsx') stats.to_excel(writer, 'Sheet1') writer.save() # display plot if len(args.players) > 0: plot(stats=stats, players=args.players) # print short summary print stats.sort_values(by=['avg_2015'], ascending=[False]).head(show)
def diag_aggregates(): years = ['2006', '2007', '2008', '2009'] df_final = None for yr in years: xls = ExcelFile(fname_all) df = xls.parse(yr, hindex_col= True) cols = [u"Mesure", u"Dépense \n(millions d'€)", u"Bénéficiaires \n(milliers)", u"Dépenses \nréelles \n(millions d'€)", u"Bénéficiaires \nréels \n(milliers)", u"Diff. relative \nDépenses", u"Diff. relative \nBénéficiaires"] selected_cols = [u"Mesure", u"Diff. relative \nDépenses", u"Diff. relative \nBénéficiaires"] df = df[selected_cols] df['year'] = yr df['num'] = range(len(df.index)) df = df.set_index(['num', u'Mesure', 'year']) if df_final is None: df_final = df else: df_final = df_final.append(df, ignore_index=False) # DataFrame.groupby() df_final = df_final.sortlevel(0) print str(fname_all)[:-5]+'_diag.xlsx' writer = ExcelWriter(str(fname_all)[:-5]+'_diag.xlsx') df_final.to_excel(writer, sheet_name="diagnostics", float_format="%.2f") writer.save()
def create_output(regression_dist_dict, closest_curve_dict, reactor_name, name_add): '''Converts the dictionaries into dataframes to format for saving as an excel. The total resutls on the first sheet and closest curves on the second''' #creates a dataframe by looping through the dict and appending the df's together. count = 0 print regression_dist_dict for key in regression_dist_dict: if count == 0: total_results = pd.DataFrame(regression_dist_dict[key], index=[key]*len(regression_dist_dict[key]), columns=['reactor', 'enrichment', 'distance']) closest_results = pd.DataFrame([closest_curve_dict[key]], index=[key], columns=['reactor', 'enrichment', 'distance']) count += 1 else: total_results = total_results.append(pd.DataFrame(regression_dist_dict[key], index=[key]*len(regression_dist_dict[key]), columns=['reactor', 'enrichment', 'distance'])) closest_results = closest_results.append(pd.DataFrame([closest_curve_dict[key]], index=[key], columns=['reactor', 'enrichment', 'distance'])) print 'total_results', total_results print 'closest_results', closest_results file_name = 'data/%s_regression_results_%s.xlsx' % ('_'.join(map(str, reactor_name)), name_add) writer = ExcelWriter(file_name) total_results.to_excel(writer, sheet_name='Sheet1') closest_results.to_excel(writer, sheet_name='Sheet2') writer.save()
class Excel(object): goal_time = 0.2 params = ['openpyxl', 'xlsxwriter', 'xlwt'] param_names = ['engine'] def setup(self, engine): N = 2000 C = 5 self.df = DataFrame(np.random.randn(N, C), columns=['float{}'.format(i) for i in range(C)], index=date_range('20000101', periods=N, freq='H')) self.df['object'] = tm.makeStringIndex(N) self.bio_read = BytesIO() self.writer_read = ExcelWriter(self.bio_read, engine=engine) self.df.to_excel(self.writer_read, sheet_name='Sheet1') self.writer_read.save() self.bio_read.seek(0) self.bio_write = BytesIO() self.bio_write.seek(0) self.writer_write = ExcelWriter(self.bio_write, engine=engine) def time_read_excel(self, engine): read_excel(self.bio_read) def time_write_excel(self, engine): self.df.to_excel(self.writer_write, sheet_name='Sheet1') self.writer_write.save()
def save_xls_name(list_dfs, xls_path, sheet_name): '''save function that takes a list as input to name sheets.''' #remove ascii characters from dataframes for saving for df in list_dfs: df.index = remove_non_ascii(df.index) for col in df.columns: df[col] = remove_non_ascii(df[col]) #save the df's to an excel file writer = ExcelWriter(xls_path) for n, df in enumerate(list_dfs): df.to_excel(writer, sheet_name[n]) writer.save() def remove_non_ascii(col): '''remove ascii for saving to excel''' new_index = [] for name in col: try: for letter in name: if ord(letter) > 128: name = name.replace(letter, '') except: pass new_index.append(name) return new_index
def AddSeqComp(mypath): """ Loads TestLogAll.h5 from the specified path, then calls MeasurementGroupTools.AddSeqComp to recalculate seq components using FFT Input: Directory of the measurment campaign, e.g.: "aLabView2" Output: Results1.h5, Results1.pdf in the data subdirs. """ from pandas import HDFStore, ExcelWriter import MeasurementGroupTools as mgt h5logs = HDFStore(mypath + "\\" + 'TestLogsAll.h5') TestLog = h5logs['TestLogsAll'] dirs = TestLog[u'DirName'].unique() for dname in dirs: mysubdirpath = mypath + "\\" + dname print "Processing: " + dname mgt.AddSeqComp(mysubdirpath, TestLog, dname) h5logs.put('TestLogsAll',TestLog) h5logs.close() writer = ExcelWriter(mypath + "\\" + 'TestLogsAll.xlsx') TestLog.to_excel(writer,'TestLogsAll') # the second argument defines sheet name writer.save() return
def build_and_send_email(self, data, options): date = timezone.now().date().strftime('%Y_%m_%d') if 'recipients' in options: print 'yes' recipients = options['recipients'] else: print 'no' recipients = settings.DEFAULT_WEEKLY_RECIPIENTS print 'recipients:', recipients message = EmailMessage(subject='Kikar Hamedina, Weekly Report: %s' % date, body='Kikar Hamedina, Weekly Report: %s.' % date, to=recipients) w = ExcelWriter('Weekly_report_%s.xlsx' % date) for datum in data: # csvfile = StringIO.StringIO() pd.DataFrame.from_dict(datum['content']).to_excel(w, sheet_name=datum['name']) w.save() w.close() # f = open(w.path, 'r', encoding='utf-8') message.attach_file(w.path) message.send()
def saveDialog(self): '''Saves the project as an .xls file.''' title = 'Save project as...' fileName,f = QFileDialog.getSaveFileName(self,title,self.path) writer = ExcelWriter(fileName+'.xls') for marker in self.markers: marker.table.to_excel(writer,marker.name) writer.save()
def writeToExcel(fileName=''): print "Writing to Excel File : "+fileName data = {'CVE ID Number': cveIDNumber, 'Summary Text': summaryText, 'Publish Date': publishDate, 'Software Type': softwareType, 'Vendor': vendor,'Product':product,'Version':version,'CVSS Score':cvssScore,'Confidentiality Impact':confidentialityImpact,'Integrity Impact':integrityImpact,'Availibility Impact':availibilityImpact,'Access Complexity':accessComplexity,'Authentication':authentication,'Gained Access':gainedAccess,'Vulnerability Type':vulnType} df = pd.DataFrame(data,columns=['CVE ID Number','Publish Date', 'Software Type','Vendor','Product','Version','CVSS Score','Confidentiality Impact','Integrity Impact','Availibility Impact','Access Complexity','Authentication','Gained Access','Vulnerability Type','Summary Text']) writer = ExcelWriter(fileName) df.to_excel(writer,'CVE Details',index=False) writer.save() print "Completed."
def save_xlsx(list_dfs, xlsx_path): writer = ExcelWriter(xlsx_path) for n, df in enumerate(list_dfs): df.to_excel(writer, '%s' %n) print('Saving %s' %n) writer.save() print('Finished writing to file') return None
def to_mem_excel(dataframe, sheet_name='WorkSheet'): iobuffer = BytesIO() writer = ExcelWriter(iobuffer, engine='xlwt') dataframe.to_excel(writer, sheet_name=sheet_name) writer.save() iobuffer.flush() iobuffer.seek(0) return iobuffer.getvalue()
def corpus_to_excel(corpus_path, excel_path): '''NB! Make sure to use .xls file extension for Excel files.''' corpus = PyCorpus(corpus_path) writer = ExcelWriter(excel_path) for key in corpus: corpus[key].to_excel(writer, sheet_name=key) writer.save() corpus.close()
def extract_SHT1x_data_day_by_day(SHT1x_dataframe, days_list): # the 'with' statement dont work today = date.today() writer = ExcelWriter('static/data/SHT1x.xlsx') for day in days_list: if day <= today: day_SHT1x = SHT1x_dataframe[str(day)] day_SHT1x.to_excel(writer, sheet_name=str(day)) writer.save()
def save_peaks_excel(peakOnlyHdf5,xlsxFile): dsets = h5py.File(peakOnlyHdf5,'r') writer = ExcelWriter(xlsxFile) for _key in dsets.keys(): dset = dsets[_key] _df = pd.DataFrame(list(dset)) _df.to_excel(writer,_key,header=False, index=False) print(_key+'sheet is created') writer.save() writer.close()
def slmode(sheet, size): writer = ExcelWriter("sw_mode_" + str(size) + "t_" + sheet + ".xlsx") columnas = dfs[str(sheet)].columns # store columns names length = len(dfs[str(sheet)].columns) new_df = pd.DataFrame(dfs[str(sheet)].iloc[:,0]) for i in range(1,length-(size-1)): for j in range(0,(size)): new_df[str(columnas[j+i])] = dfs[str(sheet)].iloc[:,j+i] new_df.to_excel(writer,"set_" + str(i), index=False) new_df = pd.DataFrame(dfs[str(sheet)].iloc[:,0]) writer.save()
def export_to_xls(df, path, format_excel=None, engine='xlsxwriter', send=False): writer = ExcelWriter(path, engine=engine, datetime_format='hh:mm:ss mmm d yyyy', date_format='mmmm dd yyyy') df.to_excel(writer) writer.save() if format_excel: format_excel(path) if send: send_file_by_email(path) else: return download_file(path)
def extract_thermo_data_day_by_day(thermo_dataframe, days_list): # the 'with' statement dont work # replace dont work properly #thermo_dataframe_sustituted = thermo_dataframe.replace({'0': 'OFF', '1': 'ON'}) #print thermo_dataframe_sustituted today = date.today() writer = ExcelWriter('static/data/thermo.xlsx') for day in days_list: if day <= today: day_thermo = thermo_dataframe[str(day)] day_thermo.to_excel(writer, sheet_name=str(day)) writer.save()
def save_xls(self, dframe): # 把数据写到已行业命名的excel文件的名字sheet xls_path = os.path.join(current_folder, '筛选后股票的财务报表', self.hangye) if os.path.exists(xls_path): # excel 文件已经存在 book = load_workbook(xls_path) writer = pd.ExcelWriter(xls_path, engine='openpyxl') writer.book = book writer.sheets = dict((ws.title, ws) for ws in book.worksheets) dframe.to_excel(writer, self.name) writer.save() else: # 文件还不存在 writer = ExcelWriter(xls_path) dframe.to_excel(writer, self.name) writer.save()
def GetPrices(): """ Goes to the URL, Reads the CSV download link, and creates the CSV DataFrame""" url = "http://fundresearch.fidelity.com/mutual-funds/fidelity-funds-daily-pricing-yields/download" CSV_Import = urllib.request.urlopen(url).read() CSV = pd.read_csv(url, skiprows=3) """ Creates CSV File to be opened in Excel. This can be removed if you don't need Excel and you can just use CSV as the DataFrame """ File = 'DailyPrices' writer = ExcelWriter(str(File) + '.xlsx') CSV.to_excel(writer, 'DailyReport', index = False) writer.close() os.startfile(File + '.xlsx')
def to_excel(): DR = data_recording.DataRecorder(db_name="PRIVATE/result.sqlite") sql = "Select * from rep" DR.con.row_factory = sqlite3.Row cursor = DR.con.execute(sql) rows = cursor.fetchall() DF = pd.DataFrame(rows, columns=[item[0] for item in cursor.description]) # nattention : il faut que le chemin existe. writer = ExcelWriter(conf_file.EXPORT_REP+'/'+'fact_excel.xlsx') DF.to_excel(writer, sheet_name='data_fact') writer.save() print("Le fichier a été sauvé dans {}".format(conf_file.EXPORT_REP+'/'+'fact_excel.xlsx'))
def to_excel(self, filename='myfile.xlsx'): """Export informations to a excel file Kargs: filename: string Name of the excel file ex: filename='myfile.xlsx' """ writer = ExcelWriter(filename) self.clfinfo.to_excel(writer,'Classifier') self.statinfo.to_excel(writer,'Statistics') try: self.featinfo.to_excel(writer,'Features') except: warn('Informations about features has been ignored. Run fit()') writer.save()
def save_data(Working_Directory, Result_Directory, name_file, Duration_ON, Duration_OFF, Num_pixels_ON, Num_pixels_OFF): ## Excel data #Save duration Duration = list() Stimulus_Type = list() Matched_Pixels = list() Stimulus_Index = list() count=0 for ii in xrange(size(Duration_ON,0)): Duration.append(mean(Duration_ON[ii,:])) Matched_Pixels.append(Num_pixels_ON[ii,:]) Stimulus_Type.append(str(count+1)+'ON') Stimulus_Index.append(count) count=count+1 for ii in xrange(size(Duration_OFF,0)): Duration.append(mean(Duration_OFF[ii,:])) Matched_Pixels.append(Num_pixels_OFF[ii,:]) Stimulus_Type.append(str(count+1)+'OFF') Stimulus_Index.append(count) count=count+1 ## For fish 23, change OFF to ON and save # Stimulus_Type[2] = '3ON' #Save matched_pixels Name_stimulus = get_list_of_stimulus_name(Working_Directory) Label_plane, Label_stimulus = label_stimulus(Name_stimulus,Stimulus_Type) Stim_type_all = repeat(Stimulus_Type, size(Matched_Pixels,1)) Matched_Pixels_all = reshape(Matched_Pixels, (size(Matched_Pixels))) Name_stimulus_all = tile(Name_stimulus, size(Matched_Pixels,0)) # Some data frames df1 = DataFrame({'Stimulus_Type':Stimulus_Type,'TDuration':Duration}) #Only duration df2 = DataFrame(index=Stimulus_Index, columns=Name_stimulus) # pixels to concatenate with duration df3 = DataFrame(index=Stimulus_Type, columns=Name_stimulus) #pixels tandalone df4 = DataFrame({'Stimulus_Type':Stim_type_all, 'Pixels':Matched_Pixels_all,\ 'Label_plane':Label_plane, 'Label_stimulus':Label_stimulus, 'Original_Stim':Name_stimulus_all}) #label pixels with stimulus and z plane df4["Stimulus"] = df4.Label_stimulus.map(Label_Odor_reverse) for ii in xrange(0,size(Stimulus_Index)): df2.ix[ii] = Matched_Pixels[ii] df3.ix[ii] = Matched_Pixels[ii] df = concat([df1,df2], join='inner', axis=1) #Save to excel writer = ExcelWriter(Result_Directory+ filesep+'Classified_Results'+filesep+name_file+ '.xlsx', engine='xlsxwriter') df.to_excel(writer, sheet_name='sheet1') writer.close() return df, df1, df3, df4
def networkset_2_spreadsheet(ntwkset, file_name=None, file_type= 'excel', *args, **kwargs): ''' Write a NetworkSet object to a spreadsheet, for your boss Write the s-parameters of a each network in the networkset to a spreadsheet. If the `excel` file_type is used, then each network, is written to its own sheet, with the sheetname taken from the network `name` attribute. This functions makes use of the pandas module, which in turn makes use of the xlrd module. These are imported during this function Notes ------ The frequency unit used in the spreadsheet is take from `ntwk.frequency.unit` Parameters ----------- ntwkset : :class:`~skrf.networkSet.NetworkSet` object the network to write file_name : str, None the file_name to write. if None, ntwk.name is used. file_type : ['csv','excel','html'] the type of file to write. See pandas.DataFrame.to_??? functions. form : 'db','ma','ri' format to write data, * db = db, deg * ma = mag, deg * ri = real, imag \*args, \*\*kwargs : passed to pandas.DataFrame.to_??? functions. See Also --------- networkset_2_spreadsheet : writes a spreadsheet for many networks ''' from pandas import DataFrame, Series, ExcelWriter # delayed because its not a requirement if ntwkset.name is None and file_name is None: raise(ValueError('Either ntwkset must have name or give a file_name')) if file_type == 'excel': writer = ExcelWriter(file_name) [network_2_spreadsheet(k, writer, sheet_name =k.name, *args, **kwargs) for k in ntwkset] writer.save() else: [network_2_spreadsheet(k,*args, **kwargs) for k in ntwkset]
def setup(self, engine): N = 2000 C = 5 self.df = DataFrame(np.random.randn(N, C), columns=['float{}'.format(i) for i in range(C)], index=date_range('20000101', periods=N, freq='H')) self.df['object'] = tm.makeStringIndex(N) self.bio_read = BytesIO() self.writer_read = ExcelWriter(self.bio_read, engine=engine) self.df.to_excel(self.writer_read, sheet_name='Sheet1') self.writer_read.save() self.bio_read.seek(0) self.bio_write = BytesIO() self.bio_write.seek(0) self.writer_write = ExcelWriter(self.bio_write, engine=engine)
import pandas as pd from pandas import ExcelWriter from pandas import ExcelFile from netCDF4 import Dataset ifile = 'merra2_omega_kenya_400_timeseries.nc' f = Dataset(ifile, mode='r') field = f.variables['wap'][:, :] #field=field*86400 mname = 'omega' print field #print str(field).replace('[','').replace(']',' ') #field=str(field).replace('[',' ').replace(']]',',') field = (" ".join(str(i) for i in field)) field = str(field).replace('[', ' ').replace(']]]', ',') df = pd.DataFrame({mname: [field]}) writer = ExcelWriter('kenya_omega_monthly_means.xlsx') df.to_excel(writer, 'Sheet1', index=True) writer.save()
#CSV File df = pd.DataFrame.from_dict(data, orient='index', dtype=None) #df = pd.read_csv("Data.csv") s = df.describe() print s #CSV File filename = 'Data.csv' df.to_csv(filename, index=True, encoding='utf-8') #print df #XLS File filename1 = 'Data.xlsx' writer = ExcelWriter(filename1) df.to_excel(writer, 'Sheet1') writer.save() # JSON File filename2 = 'Data.json' df.to_json(filename2, orient="index") s = df.describe() print s # # files = [f for f in os.listdir('./pokemon_5378')] # for f in files: # print f
import pandas as objPandas from pandas import ExcelWriter archivo = objPandas.DataFrame({ 'matricula': [12345, 1235], 'Nombre': ['Joseph', 'pedro'], 'Apellido': ['Mendez', 'lopez'] }) archivo = archivo[['matricula', 'Nombre', 'Apellido']] rut = ExcelWriter(r'C:\Users\Joseph\Desktop\excell\utm.xlsx') archivo.to_excel(rut, 'Hoja de datos', index=False) rut.save()
} response = requests.get(url, headers=headers) html_doc = response.content #print(html_doc) soup = BeautifulSoup(html_doc, 'html.parser') # get all content at tag a pids = soup.findAll('a') result = [] for pid in pids: p = pid.attrs if ('target' in p and p['target'] == '_blank' and 'data-click' in p): if ('百度快照' not in pid.contents[0]): slist = [] for x in pid.contents: slist.append(str(x)) s = ''.join(slist) s = s.replace('\n', '').replace(' ', '').replace('<em>', '').replace('</em>', '') result.append(s) # save to excel d = {'titles': result[:100]} df = pd.DataFrame(data=d) from pandas import ExcelWriter writer = ExcelWriter('hot_news.xlsx') df.to_excel(writer, 'Sheet1') writer.save()
# Condition 6: Current Price is at least 30% above 52 week low condition_6 = currentClose >= (1.3 * low_of_52week) # Condition 7: Current Price is within 25% of 52 week high condition_7 = currentClose >= (.75 * high_of_52week) # If all conditions above are true, add stock to exportList if (condition_1 and condition_2 and condition_3 and condition_4 and condition_5 and condition_6 and condition_7): exportList = exportList.append( { 'Stock': stock, "RS_Rating": RS_Rating, "50 Day MA": moving_average_50, "150 Day Ma": moving_average_150, "200 Day MA": moving_average_200, "52 Week Low": low_of_52week, "52 week High": high_of_52week }, ignore_index=True) print(stock + " made the Minervini requirements") except Exception as e: print(e) print(f"Could not gather data on {stock}") exportList = exportList.sort_values(by='RS_Rating', ascending=False) print('\n', exportList) writer = ExcelWriter("ScreenOutput.xlsx") exportList.to_excel(writer, "Sheet1") writer.save()
def save_xls(df_list, xls_path): with ExcelWriter(xls_path) as writer: for n, df in enumerate(df_list): df.to_excel(writer, 'sheet%s' % n) writer.save() print("Saved")
def extract_expression(tumor, platform, gencode_version): """ The EXTRACT_EXPRESSION operation extracts expression values from TCGA for all the genes of interest and their candidate regulatory genes. Intermediate results files are exported locally during the execution of the function, while the final dataframes are returned as Pandas dataframes and exported locally in the Excel files 'Gene Expression - InterestGenes.xlsx' and 'Gene Expression - RegulatoryGenes.xlsx'. :param tumor: full name of the tumor of interest, encoded as a string (e.g. 'Ovarian Serous Cystadenocarcinoma', 'Breast Invasive Carcinoma', ...) :param platform: number identifying the sequencing platform (either 27 for the 27k probes sequencing platform or 450 for the 450k probes sequencing platform) :param gencode_version: number representing the GENCODE genomic annotations to use (currently, for assembly GRCh38, versions 22, 24 and 27 can be used) :return: two Pandas dataframes Example:: import genereg as gr expr_interest_df, expr_regul_df = gr.GeneExpression.extract_expression(tumor='Ovarian Serous Cystadenocarcinoma', platform=27, gencode_version=22) """ # Check input parameters tcga_tumors = [ "Acute Myeloid Leukemia", "Adrenocortical Carcinoma", "Bladder Urothelial Carcinoma", "Brain Lower Grade Glioma", "Breast Invasive Carcinoma", "Cervical Squamous Cell Carcinoma and Endocervical Adenocarcinoma", "Cholangiocarcinoma", "Colon Adenocarcinoma", "Esophageal Carcinoma", "Glioblastoma Multiforme", "Head and Neck Squamous Cell Carcinoma", "Kidney Chromophobe", "Kidney Renal Clear Cell Carcinoma", "Kidney Renal Papillary Cell Carcinoma", "Liver Hepatocellular Carcinoma", "Lung Adenocarcinoma", "Lung Squamous Cell Carcinoma", "Lymphoid Neoplasm Diffuse Large B-cell Lymphoma", "Mesothelioma", "Ovarian Serous Cystadenocarcinoma", "Pancreatic Adenocarcinoma", "Pheochromocytoma and Paraganglioma", "Prostate Adenocarcinoma", "Rectum Adenocarcinoma", "Sarcoma", "Skin Cutaneous Melanoma", "Stomach Adenocarcinoma", "Testicular Germ Cell Tumors", "Thymoma", "Thyroid Carcinoma", "Uterine Carcinosarcoma", "Uterine Corpus Endometrial Carcinoma", "Uveal Melanoma" ] if tumor not in tcga_tumors: raise ValueError( 'PATHOLOGY NOT SUPPORTED! You can analyze one of these 33 types of TCGA tumors: ' + (', '.join(tcga_tumors))) if platform not in [27, 450]: raise ValueError( 'PLATFORM NOT RECOGNIZED! Sequencing platforms available: 27 and 450' ) if gencode_version not in [22, 24, 27]: raise ValueError('GRCh38 GENCODE versions available are 22, 24 and 27') # Load the list of genes of interest EntrezConversion_df = pd.read_excel('./Genes_of_Interest.xlsx', sheetname='Sheet1', header=0, converters={ 'GENE_SYMBOL': str, 'ENTREZ_GENE_ID': str, 'GENE_SET': str }) # Create a list containing the Gene Symbols of the genes of interest genesSYM_of_interest = [] for i, r in EntrezConversion_df.iterrows(): sym = r['GENE_SYMBOL'] if sym not in genesSYM_of_interest: genesSYM_of_interest.append(sym) # Import the dictionary of genes of interest with their candidate regulatory genes dict_RegulGenes = pickle.load( open('./2_Regulatory_Genes/dict_RegulGenes.p', 'rb')) # Import the gene-TFs mapping dataframe Mapping_df = pd.read_excel('./0_Genes_Mapping/Genes_Mapping.xlsx', sheetname='Sheet1', header=0, converters={ 'ENTREZ_GENE_ID': str, 'HGNC_ID': str }) # Create a list containing the Gene Symbols of the regulatory genes of genes of interest regulatory_genesSYM = [] for key, value in dict_RegulGenes.items(): for gene in value: if gene not in regulatory_genesSYM: regulatory_genesSYM.append(gene) # Extract the list of distinct Gene Symbols mapped in the mapping table mapped_gene_SYMs = [] for index, row in Mapping_df.iterrows(): sym = row['GENE_SYMBOL'] if sym not in mapped_gene_SYMs: mapped_gene_SYMs.append(sym) # Execute the query for the extraction of gene expression values on the remote server, using the PyGMQL Python library gl.set_remote_address('http://gmql.eu/gmql-rest/') gl.login() gl.set_mode('remote') # Load the TCGA datasets to be used in the query methylation_dataset = gl.load_from_remote( remote_name='GRCh38_TCGA_methylation', owner='public') expression_dataset = gl.load_from_remote( remote_name='GRCh38_TCGA_gene_expression', owner='public') # Identify the sequencing platform to be used if platform == 27: seq_platform = 'Illumina Human Methylation 27' elif platform == 450: seq_platform = 'Illumina Human Methylation 450' # Extract all the samples for the current tumor and platform all_methyl = methylation_dataset.meta_select( (methylation_dataset['manually_curated__cases__disease_type'] == tumor) & (methylation_dataset['manually_curated__platform'] == seq_platform) & ((methylation_dataset['biospecimen__bio__sample_type'] == 'Primary Tumor') | (methylation_dataset['biospecimen__bio__sample_type'] == 'Recurrent Tumor')) & (methylation_dataset[ 'clinical__shared__history_of_neoadjuvant_treatment'] == 'No')) all_expr = expression_dataset.meta_select( (expression_dataset['manually_curated__cases__disease_type'] == tumor) & ((expression_dataset['biospecimen__bio__sample_type'] == 'Primary Tumor') | (expression_dataset['biospecimen__bio__sample_type'] == 'Recurrent Tumor')) & (expression_dataset[ 'clinical__shared__history_of_neoadjuvant_treatment'] == 'No')) # Gene Expression: expr_0 = all_expr.reg_project(field_list=[ 'ensembl_gene_id', 'entrez_gene_id', 'gene_symbol', 'fpkm' ]) expr = expr_0.meta_select( semiJoinDataset=all_methyl, semiJoinMeta=['biospecimen__bio__bcr_sample_barcode']) # Materialize the results into a GDataframe expr_Gdf = expr.materialize('./(MaterializeResults)') # The result dataset is loaded as a GDataframe, an object containing two pandas dataframes, one for the region data and one for the metadata. # Get the two pandas dataframes: expr_df_regs = expr_Gdf.regs expr_df_meta = expr_Gdf.meta n_regs = len(expr_df_regs) n_samples = len(expr_df_meta) # Rename 'chr', 'start', and 'stop' columns header expr_df_regs.rename(columns={ 'chr': 'chrom', 'start': 'left', 'stop': 'right' }, inplace=True) # Change index into progressive integer numbers and store the name of the sample in another column expr_df_regs['sample_id'] = expr_df_regs.index expr_df_regs.index = range(n_regs) # Convert unknown values (NaN) to empty strings expr_df_regs = expr_df_regs.fillna('') # Convert all the metadata values into strings, since they're encode as lists in Python col_names = [] for name, values in expr_df_meta.iteritems(): col_names.append(name) for index, row in expr_df_meta.iterrows(): for c in col_names: list_val = row[c] # it's encoded as a list str_val = ''.join( list_val) # convert the value stored as a list in a string expr_df_meta.set_value(index, c, str_val) # Since we have to extract the expression values for each distinct sample barcode (aliquot), we create a list containing these distinct identifiers expr_sample_barcodes_all = [] for index, row in expr_df_meta.iterrows(): barcode = row['biospecimen__bio__bcr_sample_barcode'] if barcode not in expr_sample_barcodes_all: # get distinct values expr_sample_barcodes_all.append(barcode) # Check which are repeated aliquots, if present all_aliqouts = [] for index, row in expr_df_meta.iterrows(): barcode = row['biospecimen__bio__bcr_sample_barcode'] all_aliqouts.append(barcode) multiple_aliquots = [ item for item, count in collections.Counter(all_aliqouts).items() if count > 1 ] samples_to_remove = [] expr_sample_barcodes = [] if len(multiple_aliquots) != 0: # Among the repeated aliquots, keep only the most recent ones (of 2013) for index, row in expr_df_meta.iterrows(): year = row['biospecimen__bio__year_of_shipment'] barcode = row['biospecimen__bio__bcr_sample_barcode'] if (barcode in multiple_aliquots) and year == '2011': expr_df_meta.drop(index, inplace=True) samples_to_remove.append(index) # Import the list of aliquots in the methylation dataset text_file = open('./3_TCGA_Data/Common_Aliquots.txt', 'r') aliquots = text_file.read().split('\n') aliquots.remove('') text_file.close() # Extract the new list of distinct TCGA Aliquots to extract for index, row in expr_df_meta.iterrows(): barcode = row['biospecimen__bio__bcr_sample_barcode'] if barcode in aliquots: if barcode not in expr_sample_barcodes: expr_sample_barcodes.append(barcode) else: expr_df_meta.drop(index, inplace=True) samples_to_remove.append(index) # Remove regions that corresponded to eliminated repeated aliquots expr_df_regs = expr_df_regs.loc[~( expr_df_regs['sample_id'].isin(samples_to_remove))].copy() else: expr_sample_barcodes = expr_sample_barcodes_all # Export the metadata dataframe setting the TCGA aliquots as indexes. Metadata_df = expr_df_meta.copy() Metadata_df['id_sample'] = Metadata_df.index Metadata_df.set_index('biospecimen__bio__bcr_sample_barcode', inplace=True) writer = ExcelWriter('./3_TCGA_Data/Gene_Expression/EXPR_(Metadata).xlsx') Metadata_df.to_excel(writer, 'Sheet1') writer.save() # Extract from the expression dataset all the regions that belong to genes of interest expr_df_regs_interest = expr_df_regs.loc[expr_df_regs['gene_symbol'].isin( genesSYM_of_interest)].copy() # Extract from the expression dataset all the regions that belong to regulatory genes of genes of interest expr_df_regs_regulatory = expr_df_regs.loc[ expr_df_regs['gene_symbol'].isin(regulatory_genesSYM)].copy() # Gene expression values for each gene of interest: # Create a dictionary for storing all the gene expression values for each gene of interest and for each aliquot TCGA from collections import defaultdict dict_expr_interest = defaultdict(dict) for key, value in dict_expr_interest.items(): value = defaultdict(list) # The main dictionary has the Gene Symbols of the genes of interest as keys and each gene has another dictionary as value, which, in turn, has the different aliquots as keys and lists as values. # The idea is having a list, containing all the fpkm values, for each gene in each TCGA aliquot. # Set the Gene Symbol as keys of the main dictionary for name in genesSYM_of_interest: dict_expr_interest[name] = {} # Set the names of the samples barcodes as keys for each dictionary set as value of a specific key (genes) for sample in expr_sample_barcodes: for k, v in dict_expr_interest.items(): v[sample] = [] # Set the values by appending the expression values for each gene of interest: these expression values (fpkm) can be found in the 'expr_df_regs_interest' dataframe for index, row in expr_df_regs_interest.iterrows( ): # iterating along the whole dataframe sym = row['gene_symbol'] # get the Gene Symbol of the gene fpkm = row['fpkm'] # get the gene expression value sample = row['sample_id'] # get the name of the sample # get the aliquot corresponding to current sample aliq = expr_df_meta.get_value(sample, 'biospecimen__bio__bcr_sample_barcode') # add the value according to the correct gene ID and TCGA aliquot, rounding it to a float with maximum 6 decimal numbers, dict_expr_interest[sym][aliq].append(round(float(fpkm), 6)) # Convert the nested dictionary also into a dataframe # Create a dataframe whose row indexes are the different TCGA samples and the columns are the distinct genes of interest expr_interest_df1 = pd.DataFrame(index=expr_sample_barcodes, columns=[genesSYM_of_interest]) # Add three additional columns for the name of the sample and the ID and barcode of the patient corresponding to each aliquot, in order to have them available if we will need it expr_interest_df2 = pd.DataFrame( index=expr_sample_barcodes, columns=['Sample_ID', 'Tumor', 'Patient_ID']) # Create the final dataframe expr_interest_df = expr_interest_df1.join(expr_interest_df2) # Fill the previously created dataframe with the correct gene expression values, for each gene of interest and for each TCGA aliquot for gene_sym, dict_value in dict_expr_interest.items(): for tcga_aliq, exp_list in dict_value.items(): if (len(exp_list) != 0): fpkm = exp_list[0] # add the expression value in the proper cell of the dataframe, rounding it to a float with maximum 6 decimal numbers expr_interest_df.set_value(tcga_aliq, gene_sym, round(fpkm, 6)) # Add to the dataframe the name of each sample, the tumor code and the patient's ID in correspondence of each TCGA aliquot for index, row in expr_df_meta.iterrows(): aliquot = row['biospecimen__bio__bcr_sample_barcode'] tumor_tag = row['clinical__admin__disease_code'] patient_id = row['clinical__shared__patient_id'] expr_interest_df.set_value(aliquot, 'Sample_ID', index) expr_interest_df.set_value(aliquot, 'Tumor', tumor_tag) expr_interest_df.set_value(aliquot, 'Patient_ID', patient_id) # Add a row at the beginning of the dataframe to insert also the Entrez Gene ID of each gene of interest additional_index = ['ENTREZ_GENE_ID'] expr_interest_df0_1 = pd.DataFrame(index=additional_index, columns=[genesSYM_of_interest]) expr_interest_df0_2 = pd.DataFrame( index=additional_index, columns=['Sample_ID', 'Tumor', 'Patient_ID']) expr_interest_df0 = expr_interest_df0_1.join(expr_interest_df0_2) frames = [expr_interest_df0, expr_interest_df] expr_interest_df = pd.concat(frames) # Add for each Gene Symbol of our genes of interest the corresponding Entrez Gene ID in the first row of the dataframe for i, r in EntrezConversion_df.iterrows(): entrez_id = r['ENTREZ_GENE_ID'] gene_name = r['GENE_SYMBOL'] expr_interest_df.set_value('ENTREZ_GENE_ID', gene_name, entrez_id) # Set empty strings for NaN values in the 'GENE_SYMBOL' row expr_interest_df.set_value('ENTREZ_GENE_ID', 'Sample_ID', "") expr_interest_df.set_value('ENTREZ_GENE_ID', 'Tumor', "") expr_interest_df.set_value('ENTREZ_GENE_ID', 'Patient_ID', "") # Export the dataframe with the gene expression values for our genes of interest for each TCGA aliquot writer = ExcelWriter( './3_TCGA_Data/Gene_Expression/Gene_Expression-InterestGenes.xlsx') expr_interest_df.to_excel(writer, 'Sheet1') writer.save() # Gene expression values for each candidate regulatory gene of the genes of interest: # Create a dictionary for storing all the gene expression values for each gene of interest and for each aliquot TCGA from collections import defaultdict dict_expr_regulatory = defaultdict(dict) for key, value in dict_expr_regulatory.items(): value = defaultdict(list) # The main dictionary has the Gene Symbols of the candidate regulatory genes as keys and each gene has another dictionary as value, which, in turn, has the different aliquots as keys and lists as values. # The idea is having a list, containing all the fpkm values, for each gene in each TCGA aliquot. # Set the Gene Symbols as keys of the main dictionary for name in regulatory_genesSYM: dict_expr_regulatory[name] = {} # Set the names of the samples barcodes as keys for each dictionary set as value of a specific key (genes) for sample in expr_sample_barcodes: for k, v in dict_expr_regulatory.items(): v[sample] = [] # Set the values by appending the expression values for each candidate regulatory gene: these expression values (fpkm) can be found in the "expr_df_regs_regulatory" dataframe for index, row in expr_df_regs_regulatory.iterrows( ): # iterating along the whole dataframe sym = row['gene_symbol'] # get the Gene Symbol of the gene ens_id = row['ensembl_gene_id'] # get the Ensembl Gene ID fpkm = row['fpkm'] # get the gene expression value sample = row['sample_id'] # get the name of the sample # get the aliquot corresponding to current sample aliq = expr_df_meta.get_value(sample, 'biospecimen__bio__bcr_sample_barcode') # add the value according to the correct gene ID and TCGA aliquot, rounding it to a float with maximum 6 decimal numbers if (gencode_version == 22): if (ens_id not in [ 'ENSG00000277726.3', 'ENSG00000275895.3', 'ENSGR0000214717.8' ]): dict_expr_regulatory[sym][aliq].append(round(float(fpkm), 6)) else: dict_expr_regulatory[sym][aliq].append(round(float(fpkm), 6)) # Convert the nested dictionary also into a dataframe # Create a dataframe whose row indexes are the different TCGA samples and the columns are the distinct candidate regulatory genes expr_regulatory_df1 = pd.DataFrame(index=expr_sample_barcodes, columns=[regulatory_genesSYM]) # Add three additional columns for the name of the sample and the ID and barcode of the patient corresponding to each aliquot, in order to have them available if we will need it expr_regulatory_df2 = pd.DataFrame( index=expr_sample_barcodes, columns=['Sample_ID', 'Tumor', 'Patient_ID']) # Create the final dataframe expr_regulatory_df = expr_regulatory_df1.join(expr_regulatory_df2) # Fill the previously created dataframe with the correct gene expression values, for each candidate regulatory gene and for each TCGA aliquot for gene_sym, dict_value in dict_expr_regulatory.items(): for tcga_aliq, exp_list in dict_value.items(): if (len(exp_list) != 0): fpkm = exp_list[0] # add the expression value in the proper cell of the dataframe, rounding it to a float with maximum 6 decimal numbers expr_regulatory_df.set_value(tcga_aliq, gene_sym, round(fpkm, 6)) # Add to the dataframe the name of each sample, the tumor code and the patient's ID in correspondence of each TCGA aliquot for index, row in expr_df_meta.iterrows(): aliquot = row['biospecimen__bio__bcr_sample_barcode'] tumor_tag = row['clinical__admin__disease_code'] patient_id = row['clinical__shared__patient_id'] expr_regulatory_df.set_value(aliquot, 'Sample_ID', index) expr_regulatory_df.set_value(aliquot, 'Tumor', tumor_tag) expr_regulatory_df.set_value(aliquot, 'Patient_ID', patient_id) # Add a row at the beginning of the dataframe to insert also the Gene Symbols of each gene of interest additional_index = ['ENTREZ_GENE_ID'] expr_regulatory_df0_1 = pd.DataFrame(index=additional_index, columns=[regulatory_genesSYM]) expr_regulatory_df0_2 = pd.DataFrame( index=additional_index, columns=['Sample_ID', 'Tumor', 'Patient_ID']) expr_regulatory_df0 = expr_regulatory_df0_1.join(expr_regulatory_df0_2) frames = [expr_regulatory_df0, expr_regulatory_df] expr_regulatory_df = pd.concat(frames) # Add for each Gene Symbol of the regulatory genes the corresponding Entrez Gene ID in the first row of the dataframe for i in regulatory_genesSYM: if i == 'PTRF': entrez_id = Mapping_df.loc[Mapping_df['GENE_SYMBOL'] == 'CAVIN1', 'ENTREZ_GENE_ID'].iloc[0] else: entrez_id = Mapping_df.loc[Mapping_df['GENE_SYMBOL'] == i, 'ENTREZ_GENE_ID'].iloc[0] expr_regulatory_df.set_value('ENTREZ_GENE_ID', i, entrez_id) # Set empty strings for NaN values in the 'GENE_SYMBOL' row expr_regulatory_df.set_value('ENTREZ_GENE_ID', 'Sample_ID', "") expr_regulatory_df.set_value('ENTREZ_GENE_ID', 'Tumor', "") expr_regulatory_df.set_value('ENTREZ_GENE_ID', 'Patient_ID', "") # Export the dataframe with the gene expression values for the regulatory genes of our genes of interest for each TCGA aliquot writer = ExcelWriter( './3_TCGA_Data/Gene_Expression/Gene_Expression-RegulatoryGenes.xlsx') expr_regulatory_df.to_excel(writer, 'Sheet1') writer.save() return expr_interest_df, expr_regulatory_df
def single_acct_analysis(self, accid, label=None, save=False, savepath=''): ''' Analysize single account. parameters: accid,pass account id number, which is transferred into regular expression by this method, although regular expression is supported. label,default set to self.accid_col if None is passed to this parameter. ''' accid_item = self.trans_accid_regex(accid) acct_data = self.getAcct(accid_item, accid_label=label, over_write=False, pure=False, side='all') # acct_data=acct_data.set_index('glid',inplace=True) acct_sum = acct_data[self.drcrdesc].sum(axis=0) print('---start analysize %s---' % str(accid)) print(get_time_str()) print('---Account Data---') print('account data shape:', acct_data.shape) print('account sum:', acct_sum) print('theAcct %s:\n' % str(accid), acct_data) if acct_sum[0] != 0: dr_acct_data = self.getAcct(accid_item, accid_label=label, over_write=False, pure=False, side='dr') # dr_acct_data=dr_acct_data.set_index('glid',inplace=True) print('---Debit Data---') print('debit data shape:', dr_acct_data.shape) print('debit_side %s:\n' % str(accid), dr_acct_data) # print(dr_acct_data) else: dr_acct_data = None pass if acct_sum[1] != 0: cr_acct_data = self.getAcct(accid_item, accid_label=label, over_write=False, pure=False, side='cr') # cr_acct_data=cr_acct_data.set_index('glid',inplace=True) print('---Credit Data---') print('credit data shape:', cr_acct_data.shape) print('credit_side %s:\n' % str(accid), cr_acct_data) # print(cr_acct_data) else: cr_acct_data = None pass if save == True: import os # from autk import get_time_str from openpyxl import Workbook, load_workbook from pandas import ExcelWriter if savepath == '': savename = ''.join( ['theAcct', str(accid), '-', get_time_str(), '.xlsx']) savepath = os.path.join(os.path.abspath(os.curdir), savename) wb = Workbook() wb.save(savepath) print('new workbook created at current directory.') elif os.path.isdir(savepath): savename = ''.join( ['theAcct', str(accid), '-', get_time_str(), '.xlsx']) savepath = os.path.join(os.path.abspath(savepath), savename) wb = Workbook() wb.save(savepath) print('new workbook created at %s' % savepath) elif os.path.isfile(savepath): wb = load_workbook(savepath) print('workbook loaded at %s' % savepath) else: print('woc???,file not exist?') wb = Workbook() wb.save(savepath) wter = ExcelWriter(savepath, engine='openpyxl') wter.book = wb acct_data.to_excel(wter, sheet_name=''.join(['acct_', str(accid)])) if dr_acct_data is not None: dr_acct_data.to_excel(wter, sheet_name=''.join(['dr_', str(accid)])) wter.save() else: pass if cr_acct_data is not None: cr_acct_data.to_excel(wter, sheet_name=''.join(['cr_', str(accid)])) wter.save() else: pass wter.save() wb.close() print('%s data saved.' % str(accid)) else: print('analysis result not saved.') print('---end %s analysis---' % str(accid)) print(get_time_str()) return [acct_data, dr_acct_data, cr_acct_data]
def write_xlsx(df, name_file): writer = ExcelWriter(f'{name_file}.xlsx') df.to_excel(writer, 'Sheet1') writer.save() return 'ФАЙЛ СОХРАНЕН'
if (line.strip().find(word) != -1): col08.append(line[len(word) + 1:len(word) + 12]) word = 'peakpower=' if (line.strip().find(word) != -1): col09.append(line[len(word) + 1:len(word) + 12]) panda = pd.DataFrame(list( zip(col01, col02, col03, col04, col05, col06, col07, col08, col09)), columns=[ 'tpdf', 'tpdr', 'tpd', 'tcdf', 'tcd', 'tr', 'tf', 'avgpower', 'peakpower' ]) panda_sliced = panda.drop(list([0])) writer = ExcelWriter('../nor2_Pre-Sim.xlsx') panda_sliced.to_excel(writer) #can add sheets here if needed writer.save() os.chdir('/content/drive/My Drive/vlsi/Hspice/nor2/PLS/output') col01 = [] col02 = [] col03 = [] col04 = [] col05 = [] col06 = [] col07 = [] col08 = [] col09 = []
i = 0 truetrue = 0 for i in range(2): truetrue += cmp[i][i] i = 0 j = 0 falsefalse = 0 for i in range(2): for j in range(2): if i != j: falsefalse += cmp[i][j] print('true=', np.round(truetrue, 2)) print('false=', np.round(falsefalse, 2)) sns.heatmap(cm, annot=True, cmap='Reds') cmp = cmp / 100 fig, (ax1) = plt.subplots(1, sharey=True, figsize=(10, 10)) fig.suptitle('confusion matrix percentage') sns.heatmap(cmp, annot=False, fmt='.2%', cmap='Blues') # Export results to excel df = pd.DataFrame(y_pred) dfa = pd.DataFrame(y_pred_all) writer = ExcelWriter('test2.xlsx') df.to_excel(writer, 'Sheet1', index=False) dfa.to_excel(writer, 'Sheet2', index=False) writer.save()
if len(corArray)%30==0: threshholdUpdate() thres_FB_UP=threshUpdate[-1] thres_FB = thres_FB_UP%0.1 if currentTime - tWinHead > durWin and iTail > iHead: correlationAverage() print("thresh",thres_FB) if avgCorrelationArray[0] > thres_FB: # play() start_play() print(avgCorrelationArray) tWinHead = tWinHead + 1 iHead = iTail + 1 iTail = iHead df = pd.DataFrame({'threshold':threshUpdate}) writer =ExcelWriter('xxx.xlsx') df.to_excel(writer,'sheet1',index=False) writer.save()
def train(epoch): clf.train() # set model in training mode (need this because of dropout) correct = 0 train_loss = 0 #weights = [] #class_weights = torch.cuda.FloatTensor(weights) # dataset API gives us pythonic batching for batch_id, (data, label) in enumerate(train_loader): #print('len(train_data)',len(data)) data = Variable(data).to('cuda') target = Variable(label).to('cuda') # forward pass, calculate loss and backprop! opt.zero_grad() output = clf(data) loss = F.nll_loss(output, target) loss.backward() #loss_history.append(loss.data[0]) opt.step() train_loss += loss.item() pred = output.data.max(1)[ 1] # get the index of the max log-probability correct += pred.eq(target.view_as(pred)).sum().item() #train_loss = np.mean(loss_history) train_loss /= len(train_loader) train_accuracy = float(correct) / float(len(train_loader.dataset)) print('\n{:d}, {:.4f}, {:.4f}, {}/{}'.format(epoch, train_loss, train_accuracy, correct, len(train_loader.dataset))) a.append(epoch) b.append(train_loss) c.append(train_accuracy) # output to excel d = {'epoch': a, 'loss': b, 'accuracy': c} df = pd.DataFrame(d) writer = ExcelWriter('result_threeclass_320_4x_train_sn_3_.xlsx') df.to_excel(writer, 'Sheet1', index=False) # create chart workbook = writer.book worksheet = writer.sheets['Sheet1'] chart = workbook.add_chart({'type': 'line'}) chart.add_series({ 'categories': ['Sheet1', 1, 0, epoch + 1, 0], 'values': ['Sheet1', 1, 2, epoch + 1, 2], }) chart.set_x_axis({'name': 'epoch', 'position_axis': 'on_tick'}) chart.set_y_axis({ 'name': 'accuracy', 'major_gridlines': { 'visible': False } }) worksheet.insert_chart('D1', chart) writer.save()
# " a ":' ',' is ':' ', } df.replace(to_replace=replace_dict, inplace=True, regex=True) #replace word df.replace(to_replace=replace_dict, inplace=True, regex=True) #replace word #Filter by question words what = df_ct = df[df['Questions'].str.contains('what')] # where = df_ct = df[df['Questions'].str.contains('where')] # who = df_ct = df[df['Questions'].str.contains('who')] # how = df_ct = df[df['Questions'].str.contains('how')] # which = df[df['Questions'].str.contains('which')] # when = df[df['Questions'].str.contains('when')] # Other = df[~df['Questions'].str.contains('what|where|who|how|which|when')] # #save in excel file writer = ExcelWriter('outputFiles/questionWords TrainingSet.xlsx') # writer = ExcelWriter('questionWords ValidationSet.xlsx') # writer = ExcelWriter('Analysis2 TrainingSet.xlsx') # writer = ExcelWriter('Analysis2 ValidationSet.xlsx') df.to_excel(writer, 'all', index=False) what.to_excel(writer, 'what', index=False) where.to_excel(writer, 'where', index=False) who.to_excel(writer, 'who', index=False) which.to_excel(writer, 'which', index=False) when.to_excel(writer, 'when', index=False) Other.to_excel(writer, 'Other', index=False) writer.save() #filter # df_mri = df[df['Questions'].str.contains('mri')]#Only questions about mri
wb = pd.ExcelFile('Client Detail_2021_2H_concat.xlsx', engine='openpyxl') worksheets = wb.sheet_names # gets sheet names - works but also creates sheet named ' (200) Storage' that needs to be deleted new_sheets = [] del worksheets[0] # could probably use a list comprehension here for sheet in worksheets: sheet_frame = pd.read_excel(wb, sheet, header=0) if 'concat' in sheet_frame.columns: new_sheets.append(sheet) # gets clients from weekly pallet counts - works wpc_df = pd.read_excel(wb, 'Weekly Pallet Counts') clients = wpc_df['concat'].unique() for client in clients: writer = ExcelWriter(f'{client}_Details_{dt_string}.xlsx') for sheet in new_sheets: sheet_frame = pd.read_excel(wb, sheet, header=0) if 'concat' in sheet_frame.columns: client_sheet = sheet_frame[sheet_frame['concat'] == client] if client_sheet.shape[0] == 0: continue # with pd.ExcelWriter(f'{client}_Details_{dt_string}.xlsx') as writer: # pylint: disable=abstract-class-instantiated client_sheet.to_excel(writer, index=False, sheet_name=sheet) else: pass writer.save()
'Metallica', 'Elvis Presley', 'Luke Bryan', 'Mitchell Tenpenny', 'Zac Brown Band', 'Josh Groban', 'Dierks Bentley', 'Blake Shelton', 'Eric Church', 'John Legend', 'Zedd', 'Normani', 'Flipp Dinero', 'Migos', 'The Weeknd', 'Offset', 'Keith Urban', 'J Balvin', 'Kelly Clarkson', 'Barbra Streisand', 'Gucci Mane', 'Rihanna', 'Daddy Yankee', 'Old Dominion' ] iter = 0 rows_list = [] while iter < len(artists): artist = artists[iter] query = "SELECT * FROM Tweets WHERE artistName ='" + artist + "'" cursor.execute(query) total_score = 0 num_tweets = 0 for (artistName, timestamp, content) in cursor: score = Score(content) total_score += score num_tweets += 1 avg_score = total_score / num_tweets print('Score for ' + artist_names[iter] + ': ' + str(avg_score) + '\n') entry_dict = {} entry_dict['Artist'] = artist_names[iter] entry_dict['Sentiment Score'] = avg_score rows_list.append(entry_dict) iter += 1 sentiment = pd.DataFrame(rows_list) writer = ExcelWriter('sentment.xlsx') sentiment.to_excel(writer, 'Sheet1') writer.save()
def sortCSVfile(): workbook = pd.ExcelFile('Compiled.xls') SummaryTable = pd.read_excel(workbook, 'CSV_summary') MeanCompiled = 'MeanCompiled.xls' meanHits = SummaryTable.groupby('Animal ID')['Hits'].mean() meanMisses = SummaryTable.groupby('Animal ID')['Misses'].mean() meanFA = SummaryTable.groupby('Animal ID')['False alarms'].mean() meanCR = SummaryTable.groupby('Animal ID')['Correct Rejections'].mean() meanISI = SummaryTable.groupby('Animal ID')['ISI touches'].mean() meanHR = SummaryTable.groupby('Animal ID')['Hit rate'].mean() meanFAR = SummaryTable.groupby('Animal ID')['False alarm rate'].mean() meanD = SummaryTable.groupby('Animal ID')['D-prime'].mean() meanC = SummaryTable.groupby('Animal ID')['Criterion'].mean() meanResponseLat = SummaryTable.groupby( 'Animal ID')['Mean Response Latency'].mean() meanHitLat = SummaryTable.groupby('Animal ID')['Mean Hit Latency'].mean() meanFAlat = SummaryTable.groupby( 'Animal ID')['Mean False Alarm Latency'].mean() meanRETlat = SummaryTable.groupby( 'Animal ID')['Mean Retrieval Latency'].mean() meanMagEntries = SummaryTable.groupby( 'Animal ID')['Magazine Entries'].mean() meanBIRBeam = SummaryTable.groupby('Animal ID')['Back Beam Breaks'].mean() meanFIRBeam = SummaryTable.groupby('Animal ID')['Front Beam Breaks'].mean() strategyTable = pd.read_excel(workbook, 'CSV_strategy') meanLatBetweenHits = strategyTable.groupby( 'Animal ID')['Latency Between Hits (MEAN)'].mean() stdevLatBetweenHits = strategyTable.groupby( 'Animal ID')['Latency Between Hits (STDEV)'].mean() maxLatBetweenHits = strategyTable.groupby( 'Animal ID')['Latency Between Hits (MAX)'].mean() meanLatBetweenResponses = strategyTable.groupby( 'Animal ID')['Latency Between Stimuli Responses (MEAN)'].mean() stdevLatBetweenResponses = strategyTable.groupby( 'Animal ID')['Latency Between Stimuli Responses (STDEV)'].mean() maxLatBetweenResponses = strategyTable.groupby( 'Animal ID')['Latency Between Stimuli Responses (MAX)'].mean() meanTrialsBetweenResponses = strategyTable.groupby( 'Animal ID')['Trials Between Stimuli Responses (MEAN)'].mean() stdevTrialsBetweenResponses = strategyTable.groupby( 'Animal ID')['Trials Between Stimuli Responses (STDEV)'].mean() maxTrialsBetweenResponses = strategyTable.groupby( 'Animal ID')['Trials Between Stimuli Responses (MAX)'].mean() meanFAboutLength = strategyTable.groupby( 'Animal ID')['False Alarm Bout Length (MEAN)'].mean() stdevFAboutLength = strategyTable.groupby( 'Animal ID')['False Alarm Bout Length (STDEV)'].mean() maxFAboutLength = strategyTable.groupby( 'Animal ID')['False Alarm Bout Length (MAX)'].mean() meanHitboutLength = strategyTable.groupby( 'Animal ID')['Hit Bout Length (MEAN)'].mean() stdevHitboutLength = strategyTable.groupby( 'Animal ID')['Hit Bout Length (STDEV)'].mean() maxHitboutLength = strategyTable.groupby( 'Animal ID')['Hit Bout Length (MAX)'].mean() mean_retrievalFrontBeam = strategyTable.groupby( 'Animal ID')['Latency Retrieval --> Front Beam Break (MEAN)'].mean() stdev_retrievalFrontBeam = strategyTable.groupby( 'Animal ID')['Latency Retrieval --> Front Beam Break (STDEV)'].mean() max_retrievalFrontBeam = strategyTable.groupby( 'Animal ID')['Latency Retrieval --> Front Beam Break (MAX)'].mean() hitBinTable = pd.read_excel(workbook, 'CSV_binsHR') hitsBin1 = hitBinTable.groupby('Animal ID')['Bin1'].mean() hitsBin2 = hitBinTable.groupby('Animal ID')['Bin2'].mean() hitsBin3 = hitBinTable.groupby('Animal ID')['Bin3'].mean() hitsBin4 = hitBinTable.groupby('Animal ID')['Bin4'].mean() hitsBin5 = hitBinTable.groupby('Animal ID')['Bin5'].mean() hitsBin6 = hitBinTable.groupby('Animal ID')['Bin6'].mean() hitsBin7 = hitBinTable.groupby('Animal ID')['Bin7'].mean() hitsBin8 = hitBinTable.groupby('Animal ID')['Bin8'].mean() hitsBin9 = hitBinTable.groupby('Animal ID')['Bin9'].mean() hitsBin10 = hitBinTable.groupby('Animal ID')['Bin10'].mean() hitsBin11 = hitBinTable.groupby('Animal ID')['Bin11'].mean() hitsBin12 = hitBinTable.groupby('Animal ID')['Bin12'].mean() FABinTable = pd.read_excel(workbook, 'CSV_binsFAR') FARBin1 = FABinTable.groupby('Animal ID')['Bin1'].mean() FARBin2 = FABinTable.groupby('Animal ID')['Bin2'].mean() FARBin3 = FABinTable.groupby('Animal ID')['Bin3'].mean() FARBin4 = FABinTable.groupby('Animal ID')['Bin4'].mean() FARBin5 = FABinTable.groupby('Animal ID')['Bin5'].mean() FARBin6 = FABinTable.groupby('Animal ID')['Bin6'].mean() FARBin7 = FABinTable.groupby('Animal ID')['Bin7'].mean() FARBin8 = FABinTable.groupby('Animal ID')['Bin8'].mean() FARBin9 = FABinTable.groupby('Animal ID')['Bin9'].mean() FARBin10 = FABinTable.groupby('Animal ID')['Bin10'].mean() FARBin11 = FABinTable.groupby('Animal ID')['Bin11'].mean() FARBin12 = FABinTable.groupby('Animal ID')['Bin12'].mean() DBinTable = pd.read_excel(workbook, 'CSV_binsD') DBin1 = DBinTable.groupby('Animal ID')['Bin1'].mean() DBin2 = DBinTable.groupby('Animal ID')['Bin2'].mean() DBin3 = DBinTable.groupby('Animal ID')['Bin3'].mean() DBin4 = DBinTable.groupby('Animal ID')['Bin4'].mean() DBin5 = DBinTable.groupby('Animal ID')['Bin5'].mean() DBin6 = DBinTable.groupby('Animal ID')['Bin6'].mean() DBin7 = DBinTable.groupby('Animal ID')['Bin7'].mean() DBin8 = DBinTable.groupby('Animal ID')['Bin8'].mean() DBin9 = DBinTable.groupby('Animal ID')['Bin9'].mean() DBin10 = DBinTable.groupby('Animal ID')['Bin10'].mean() DBin11 = DBinTable.groupby('Animal ID')['Bin11'].mean() DBin12 = DBinTable.groupby('Animal ID')['Bin12'].mean() CBinTable = pd.read_excel(workbook, 'CSV_binsC') CBin1 = CBinTable.groupby('Animal ID')['Bin1'].mean() CBin2 = CBinTable.groupby('Animal ID')['Bin2'].mean() CBin3 = CBinTable.groupby('Animal ID')['Bin3'].mean() CBin4 = CBinTable.groupby('Animal ID')['Bin4'].mean() CBin5 = CBinTable.groupby('Animal ID')['Bin5'].mean() CBin6 = CBinTable.groupby('Animal ID')['Bin6'].mean() CBin7 = CBinTable.groupby('Animal ID')['Bin7'].mean() CBin8 = CBinTable.groupby('Animal ID')['Bin8'].mean() CBin9 = CBinTable.groupby('Animal ID')['Bin9'].mean() CBin10 = CBinTable.groupby('Animal ID')['Bin10'].mean() CBin11 = CBinTable.groupby('Animal ID')['Bin11'].mean() CBin12 = CBinTable.groupby('Animal ID')['Bin12'].mean() ISIBinTable = pd.read_excel(workbook, 'CSV_binsISI') ISIBin1 = ISIBinTable.groupby('Animal ID')['Bin1'].mean() ISIBin2 = ISIBinTable.groupby('Animal ID')['Bin2'].mean() ISIBin3 = ISIBinTable.groupby('Animal ID')['Bin3'].mean() ISIBin4 = ISIBinTable.groupby('Animal ID')['Bin4'].mean() ISIBin5 = ISIBinTable.groupby('Animal ID')['Bin5'].mean() ISIBin6 = ISIBinTable.groupby('Animal ID')['Bin6'].mean() ISIBin7 = ISIBinTable.groupby('Animal ID')['Bin7'].mean() ISIBin8 = ISIBinTable.groupby('Animal ID')['Bin8'].mean() ISIBin9 = ISIBinTable.groupby('Animal ID')['Bin9'].mean() ISIBin10 = ISIBinTable.groupby('Animal ID')['Bin10'].mean() ISIBin11 = ISIBinTable.groupby('Animal ID')['Bin11'].mean() ISIBin12 = ISIBinTable.groupby('Animal ID')['Bin12'].mean() FARbyStimTable = pd.read_excel(workbook, 'CSV_FARbyStim') FARbyStim1 = FARbyStimTable.groupby('Animal ID')['FAR stimulus 1/2'].mean() FARbyStim2 = FARbyStimTable.groupby('Animal ID')['FAR stimulus 3'].mean() FARbyStim3 = FARbyStimTable.groupby('Animal ID')['FAR stimulus 4'].mean() FARbyStim4 = FARbyStimTable.groupby('Animal ID')['FAR stimulus 5'].mean() allSummary = (meanFA, meanCR, meanISI, meanHR, meanFAR, meanD, meanC, meanResponseLat, meanHitLat, meanFAlat, meanRETlat, meanMagEntries, meanBIRBeam, meanFIRBeam) allStrategy = (maxLatBetweenHits, meanLatBetweenResponses, stdevLatBetweenResponses, maxLatBetweenResponses, meanTrialsBetweenResponses, stdevTrialsBetweenResponses, maxTrialsBetweenResponses, meanFAboutLength, stdevFAboutLength, maxFAboutLength, meanHitboutLength, stdevHitboutLength, maxHitboutLength, mean_retrievalFrontBeam, stdev_retrievalFrontBeam, max_retrievalFrontBeam) allHR = (hitsBin3, hitsBin4, hitsBin5, hitsBin6, hitsBin7, hitsBin8, hitsBin9, hitsBin10, hitsBin11, hitsBin12) allFAR = (FARBin3, FARBin4, FARBin5, FARBin6, FARBin7, FARBin8, FARBin9, FARBin10, FARBin11, FARBin12) allD = (DBin3, DBin4, DBin5, DBin6, DBin7, DBin8, DBin9, DBin10, DBin11, DBin12) allC = (CBin3, CBin4, CBin5, CBin6, CBin7, CBin8, CBin9, CBin10, CBin11, CBin12) allISI = (ISIBin3, ISIBin4, ISIBin5, ISIBin6, ISIBin7, ISIBin8, ISIBin9, ISIBin10, ISIBin11, ISIBin12) allFARbyStim = (FARbyStim3, FARbyStim4) #SUMMARY TABLE meanHits = meanHits.to_frame().reset_index() summaryTable = meanHits.merge(meanMisses.to_frame(), left_on='Animal ID', right_index=True) for i in allSummary: summaryTable = summaryTable.merge(i.to_frame(), left_on='Animal ID', right_index=True) summaryTable.set_index('Animal ID', inplace=True) #STRATEGY TABLE meanLatBetweenHits = meanLatBetweenHits.to_frame().reset_index() stratTable = meanLatBetweenHits.merge(stdevLatBetweenHits.to_frame(), left_on='Animal ID', right_index=True) for i in allStrategy: stratTable = stratTable.merge(i.to_frame(), left_on='Animal ID', right_index=True) stratTable.set_index('Animal ID', inplace=True) #HIT TABLE hitsBin1 = hitsBin1.to_frame().reset_index() hitTable = hitsBin1.merge(hitsBin2.to_frame(), left_on='Animal ID', right_index=True) for i in allHR: hitTable = hitTable.merge(i.to_frame(), left_on='Animal ID', right_index=True) hitTable.set_index('Animal ID', inplace=True) #FAR TABLE FARBin1 = FARBin1.to_frame().reset_index() FARTable = FARBin1.merge(FARBin2.to_frame(), left_on='Animal ID', right_index=True) for i in allFAR: FARTable = FARTable.merge(i.to_frame(), left_on='Animal ID', right_index=True) FARTable.set_index('Animal ID', inplace=True) #D TABLE DBin1 = DBin1.to_frame().reset_index() DTable = DBin1.merge(DBin2.to_frame(), left_on='Animal ID', right_index=True) for i in allD: DTable = DTable.merge(i.to_frame(), left_on='Animal ID', right_index=True) DTable.set_index('Animal ID', inplace=True) #C TABLE CBin1 = CBin1.to_frame().reset_index() CTable = CBin1.merge(CBin2.to_frame(), left_on='Animal ID', right_index=True) for i in allC: CTable = CTable.merge(i.to_frame(), left_on='Animal ID', right_index=True) CTable.set_index('Animal ID', inplace=True) #ISI TABLE ISIBin1 = ISIBin1.to_frame().reset_index() ISITable = ISIBin1.merge(ISIBin2.to_frame(), left_on='Animal ID', right_index=True) for i in allISI: ISITable = ISITable.merge(i.to_frame(), left_on='Animal ID', right_index=True) ISITable.set_index('Animal ID', inplace=True) # FARbySTim TABLE FARbyStim1 = FARbyStim1.to_frame().reset_index() FStimTable = FARbyStim1.merge(FARbyStim2.to_frame(), left_on='Animal ID', right_index=True) for i in allFARbyStim: FStimTable = FStimTable.merge(i.to_frame(), left_on='Animal ID', right_index=True) FStimTable.set_index('Animal ID', inplace=True) list_dfs = (summaryTable, stratTable, hitTable, FARTable, DTable, CTable, ISITable, FStimTable) writer = ExcelWriter(MeanCompiled) nameList = (str('Summary'), str('Strategies'), str('Hit Bins'), str('False Alarm Bins'), str('D prime bins'), str('Criterion bins'), str('ISI bins'), str('FAR by non-target')) loop = 0 for n, df in enumerate(list_dfs): sheetName = (nameList[loop]) df.to_excel(writer, sheetName) loop += 1 writer.save()
def prepare_excel_file(mydict): with ExcelWriter("validation_excel.xlsx") as writer: for k, v in mydict.items(): v.to_excel(writer, sheet_name=k) move_files('validation_excel.xlsx')
error_pre = math.sqrt( (pi_1_3 * (1 - pi_1_3)) * (1 / (n_1 + n_3) + 1 / n_2)) zscore = (p_1 - p_2) / Decimal(error) zscore_pre = (pi_1_3 - p_2) / Decimal(error_pre) pval = st.norm.sf(abs(float(zscore))) * 2 pval_pre = st.norm.sf(abs(float(zscore_pre))) * 2 significant = 0 significant_pre = 0 if pval < alpha: significant = 1 if pval_pre < alpha: significant_pre = 1 #------- Confidence interval for 95%, 98% or 99% ------- ci_lower = (p_1 - p_2) + Decimal(z_value * error) ci_upper = (p_1 - p_2) - Decimal(z_value * error) ci_lower_pre = (pi_1_3 - p_2) + Decimal(z_value * error_pre) ci_upper_pre = (pi_1_3 - p_2) - Decimal(z_value * error_pre) #------- Write data to data frame ------- df.loc[len(df)] = [ Decimal(x), pi_1, pi_2, pi_3, n_1, n_2, n_3, error, error_pre, zscore, zscore_pre, pval, pval_pre, effectsize, effectsize_pre, significant, significant_pre, ci_lower, ci_upper, ci_lower_pre, ci_upper_pre ] writer = ExcelWriter('df.xlsx') df.to_excel(writer, 'Sheet1') writer.save()
def main(func=lambda x: x, mode=""): for feature_set in FEATURES_FILES: print("CURR feature_set:", feature_set) full_features_amnt = feature_set[1] - IGNORED_COLS_NUM - 2 n_features_list = [full_features_amnt] if full_features_amnt < 100: n_features_list += list(range(30, full_features_amnt, 15)) else: n_features_list += [50] n_features_list += list(range(100, min(750, full_features_amnt), 150)) + \ list(range(1000, min(6000, full_features_amnt), 1500)) for n_features in n_features_list: print("CURR n_features:", n_features) videos_features, videos_labels = get_videos_features_labels( feature_set, n_features, transform=func) models = [] # models += [(RandomForestClassifier(n_estimators=70, random_state=1), "RF_70_estimators")] # Random Forest models += [(RandomForestClassifier(n_estimators=i, max_depth=j), RF_MODEL_NAME + "_%d_trees_%d_depth" % (i, j)) for i in range(50, 250, 50) for j in range(6, 16, 3)] # Random Forest if mode not in ["binary", "change"]: if feature_set == FEATURES_FILES[0]: models += [(make_pipeline(PolynomialFeatures(i), linear_model.Ridge()), POL_MODEL_NAME + "_%d_degree" % i) for i in range(1, 4)] else: models += [(make_pipeline(PolynomialFeatures(i), linear_model.Ridge()), POL_MODEL_NAME + "_%d_degree" % i) for i in range(1, 2)] models += [(svm.LinearSVC(max_iter=2000), SVM_MODEL_NAME)] # SVM perform_general_learning( videos_features, videos_labels, models, "%s_%d" % (feature_set[0][:-4], n_features), mode) perform_within_sub_learning( videos_features, videos_labels, models, "%s_%d" % (feature_set[0][:-4], n_features), mode) # saving all the data frames if mode != "": mode += "_" within_test_size_output = concat(WITHIN_TEST_SIZE, sort=False).transpose() within_test_size_output.to_csv( path.join(OUTPUT_PATH, "%swithin_subject_test_sizes.csv" % mode)) within_test_vids_output = concat(WITHIN_TEST_VIDS, sort=False) within_test_vids_output.to_csv( path.join(OUTPUT_PATH, "%swithin_subject_test_videos.csv" % mode)) for df, name in [ (DataFrame(GENERAL_CHANCE), "%sgeneral_learning_chance_level.csv" % mode), (DataFrame(WITHIN_CHANCE), "%swithin_subject_chance_level.csv" % mode), (within_test_size_output, "%swithin_subject_test_sizes.csv" % mode) ]: df.to_csv(path.join(OUTPUT_PATH, name)) for data, paradigm in [ (EA_GENERAL_RESULTS, "%sgeneral_learning_ea" % mode), (EA_WITHIN_RESULTS, "%swithin_subject_ea" % mode), (FULL_GENERAL_RESULTS, "%sgeneral_learning_full" % mode), (FULL_WITHIN_RESULTS, "%swithin_subject_full" % mode) ]: with ExcelWriter(path.join(OUTPUT_PATH, paradigm + "_results.xlsx")) as writer: save_results(writer, data, "results") with ExcelWriter(path.join(OUTPUT_PATH, '%sall_models_results.xlsx' % mode)) as writer: general_results = save_all_models(writer, ALL_MODELS_GENERAL_RESULTS, "general") within_results = save_all_models(writer, ALL_MODELS_WITHIN_RESULTS, "within") with ExcelWriter(path.join(OUTPUT_PATH, '%sall_models_ea.xlsx' % mode)) as writer: general_ea = save_all_models(writer, ALL_MODELS_GENERAL_EA, "general") within_ea = save_all_models(writer, ALL_MODELS_WITHIN_EA, "within") with ExcelWriter( path.join(OUTPUT_PATH, '%sall_models_train_results.xlsx' % mode)) as writer: general_train = save_all_models(writer, ALL_MODELS_GENERAL_TRAIN, "general") # plots train-test trade-off plot_train_test(general_results, general_train, mode) # calculate the accuracy and EA correlation accuracy_ea_correlation(mode, general_results, general_ea, within_results, within_ea) general_test = concat(ALL_MODELS_GENERAL_TEST, sort=False).transpose() general_test.to_csv( path.join(OUTPUT_PATH, "%sall_models_test_results.csv" % mode)) general_test = general_test.values.flatten() general_ea_test = concat(ALL_MODELS_GENERAL_EA_TEST, sort=False).transpose().values.flatten() best_acc = np.nanargmax(general_results.values) best_ea = np.nanargmax(general_ea.values) best_acc_model_name = general_results.index[best_acc // len(general_results.columns)] best_ea_model_name = general_ea.index[best_ea // len(general_ea.columns)] df = DataFrame({ "best_acc_model (%s)" % best_acc_model_name: { "accuracy": general_test[best_acc], "ea": general_ea_test[best_acc] }, "best_ea_model (%s)" % best_ea_model_name: { "accuracy": general_test[best_ea], "ea": general_ea_test[best_ea] } }) df.to_csv(path.join(OUTPUT_PATH, "%stest_results.csv" % mode)) print("--------------TEST-------------") print(df) print("--------------TEST-------------")
def output_report( filename, subject_enrollments_by_date, search_enrollments_by_date, subject_clicks_by_date, search_clicks_by_date, total_course_card_clicks_by_date, total_program_course_cards_by_date, total_homepage_views=None, total_course_card_clicks=None, total_program_card_clicks=None, featured_cards=None, homepage_subjects=None): writer = ExcelWriter(filename,engine='xlsxwriter') # Get access to the workbook workbook = writer.book # Set the formats needed for the report money_fmt = workbook.add_format({'num_format': '$#,##0', 'bold': True}) percent_fmt = workbook.add_format({'num_format': '0.0%', 'bold': False}) comma_fmt = workbook.add_format({'num_format': '#,##0', 'bold': False}) date_fmt = workbook.add_format({'num_format': 'dd/mm/yy'}) cell_format = workbook.add_format({'bold': True, 'italic': False}) merge_format = workbook.add_format( { 'bold': 1, 'align': 'center', 'valign': 'vcenter', } ) # Create the homepage courses featured_cards_worksheet if featured_cards is not None: total_search_clicks = int(search_clicks_by_date['uniqueClicks'].sum()) total_subject_clicks = int(subject_clicks_by_date['uniqueClicks'].sum()) total_search_enrollments = int(search_enrollments_by_date['uniqueEnrollments'].sum()) total_subject_enrollments = int(subject_enrollments_by_date['uniqueEnrollments'].sum()) search_enrollment_conversion_rate = float(total_search_enrollments) / total_homepage_views subject_enrollment_conversion_rate = float(total_subject_enrollments) / total_homepage_views # The total course card clicks and total program card clicks values are off so use these instead all_course_cards_clicks = int(total_course_card_clicks_by_date['uniqueClicks'].sum()) all_program_course_cards_clicks = int(total_program_course_cards_by_date['uniqueClicks'].sum()) total_clicks = all_course_cards_clicks + all_program_course_cards_clicks + total_search_clicks + total_subject_clicks total_enrolls = int(featured_cards['uniqueEnrolls'].sum()) + total_search_enrollments + total_subject_enrollments featured_cards.to_excel(writer, index=False, sheet_name='Featured Card Report', startrow=18) featured_cards_worksheet = writer.sheets['Featured Card Report'] # Set column width and formatting featured_cards_worksheet.set_column('A:A', 60) featured_cards_worksheet.set_column('D:D', 15, comma_fmt) featured_cards_worksheet.set_column('E:E', 15, percent_fmt) featured_cards_worksheet.set_column('F:H', 15, comma_fmt) featured_cards_worksheet.set_column('I:O', 15, percent_fmt) # Write headings featured_cards_worksheet.write( 'A1', 'Homepage Course Enrollments, Data from {start} to {end}'.format(start=start_date, end=end_date), cell_format ) featured_cards_worksheet.write('A3', 'Overview', cell_format) featured_cards_worksheet.write('A4', 'Total Homepage Views:', cell_format) featured_cards_worksheet.write('A6', 'Total Clicks on Home Page:', cell_format) featured_cards_worksheet.write('A7', ' feat. course clicks', cell_format) featured_cards_worksheet.write('A8', ' feat. program clicks', cell_format) featured_cards_worksheet.write('A9', ' feat. search clicks', cell_format) featured_cards_worksheet.write('A10', ' feat. subject clicks', cell_format) featured_cards_worksheet.write('A11', 'Total CTR', cell_format) featured_cards_worksheet.write('C12', 'card conversion', cell_format) featured_cards_worksheet.write('A13', 'Total Enrollments from card clicks:', cell_format) featured_cards_worksheet.write('A14', ' enrollment on course about (top+bottom)', cell_format) featured_cards_worksheet.write('A15', ' enrolllment on program about', cell_format) featured_cards_worksheet.write('A16', ' enrollment on search', cell_format) featured_cards_worksheet.write('A17', ' enrollment on subject card clicks', cell_format) featured_cards_worksheet.write('A18', 'Top Performing Cards + Conversion', cell_format) featured_cards_worksheet.merge_range('F18:H18', 'enrollment events from card click', merge_format) featured_cards_worksheet.merge_range('I18:K18', 'conversion from card click', merge_format) featured_cards_worksheet.merge_range('L18:M18', 'clickshare vs. other cards', merge_format) featured_cards_worksheet.merge_range('N18:O18', 'enrollments per impression', merge_format) # Write Overview Data featured_cards_worksheet.write('B4', total_homepage_views, comma_fmt) featured_cards_worksheet.write('B6', int(total_clicks), comma_fmt) featured_cards_worksheet.write('B7', all_course_cards_clicks, comma_fmt) featured_cards_worksheet.write('B8', all_program_course_cards_clicks, comma_fmt) featured_cards_worksheet.write('B9', total_search_clicks, comma_fmt) featured_cards_worksheet.write('B10', total_subject_clicks, comma_fmt) featured_cards_worksheet.write('B11', float(total_clicks)/total_homepage_views, percent_fmt) featured_cards_worksheet.write('B13', int(total_enrolls), comma_fmt) featured_cards_worksheet.write('B14', int(featured_cards['uniqueCourseEnrolls'].sum()), comma_fmt) featured_cards_worksheet.write('B15', int(featured_cards['uniqueProgramEnrolls'].sum()), comma_fmt) featured_cards_worksheet.write('B16', total_search_enrollments, comma_fmt) featured_cards_worksheet.write('B17', total_subject_enrollments, comma_fmt) featured_cards_worksheet.write('C13', float(total_enrolls)/total_homepage_views, percent_fmt) featured_cards_worksheet.write('C14', float(featured_cards['uniqueCourseEnrolls'].sum()) / total_homepage_views, percent_fmt) featured_cards_worksheet.write('C15', float(featured_cards['uniqueProgramEnrolls'].sum()) / total_homepage_views, percent_fmt) featured_cards_worksheet.write('C16', search_enrollment_conversion_rate, percent_fmt) featured_cards_worksheet.write('C17', subject_enrollment_conversion_rate, percent_fmt) if homepage_subjects is not None: homepage_subjects.to_excel(writer, index=False, sheet_name='HomepageSubjects', startrow=2) # Get the homepage subject worksheet homepage_subject_worksheet = writer.sheets['HomepageSubjects'] # Set conditional format homepage_subject_worksheet.conditional_format('C1:C1000', {'type': '3_color_scale'}) # Set column width and formatting homepage_subject_worksheet.set_column('A:A', 27) homepage_subject_worksheet.set_column('B:B', 15, comma_fmt) homepage_subject_worksheet.set_column('C:S', 15, percent_fmt) # Write heading homepage_subject_worksheet.write('A1', 'Top Subject Pages from the Homepage, Data from '+str(start_date)+' to '+str(end_date) , cell_format) # Write out the .xlsx file writer.save()
dt.date.today().strftime("%Y%m%d"))), action='store') args = parser.parse_args() if args.duplication_type == "RPA" and args.rpa_file is None: argparse.ArgumentTypeError( "no RPA file specified for duplication type of {0}".format( args.duplication_type)) if args.duplication_type == 'RPA': print("evaluating RPA inventory to ETA submissions worklist") elif args.duplication_type == "COLLAB": print("evaluating ETA submissions worklist against itself") evaluator = de.DupEvaluator(args) dup_df = evaluator.dup_eval() print("{0:d} duplicates found".format(dup_df.shape[0])) print("saving duplicates to {0}".format(args.output_file)) with ExcelWriter(os.fspath(args.output_file)) as writer: dup_df.to_excel(writer, index=False) except Exception as e: print(e, file=sys.stdout) rv += 1 finally: print("end {0} (elapsed time: {1})".format(parser.prog, dt.datetime.now() - startdt)) sys.exit(rv)
import pandas as pd from pandas import ExcelWriter import os #import itertools import numpy as np os.chdir( 'C:\Users\\nauga\Google Drive\BuildingPrognostics\ForecastingTrainingData') df = pd.ExcelFile('JunJulyCleanedData2.xlsx').parse('Sheet1') df.replace('', np.nan, inplace=True) #newpd = pd.DataFrame(df.iloc[0::6, 0::])### #print newpd newpd = df[np.isfinite(df['Hourly Totalization.Hourly Totals Trend ()'])] writer = ExcelWriter('JunJulyFinalData.xlsx') newpd.to_excel(writer, 'Sheet1') writer.save() #newpd = df[np.isfinite(df['Hourly Totalization.Hourly Totals Trend ()'])] #df = pd.ExcelFile('reqData.xlsx').parse('Sheet3') #df.replace('', np.nan, inplace=True) #print df
dcr['sterile'] = dcr.doctor_synopsis.apply(lambda x: exist(x, '不孕不育')) dcr['digestion'] = dcr.doctor_synopsis.apply(lambda x: exist(x, '肠胃')) dcr['digestion_2'] = dcr.doctor_synopsis.apply(lambda x: exist(x, '消化')) dcr['urinary'] = dcr.doctor_synopsis.apply(lambda x: exist(x, '泌尿')) synopsis_extract_list = [hco, dept, dcr] save_xls(synopsis_extract_list, 'synopsis_extract.xlsx') # 眼科 d0 = dcr.loc[dcr.dept.str.contains('眼科', na=False)] # 眼底病 d1 = d0[d0['doctor_skill'].str.contains('眼底病', na=False)] d2 = d0[d0['doctor_synopsis'].str.contains('眼底病', na=False)] d3 = d1.append(d2).drop_duplicates() writer = ExcelWriter('ophthalmology.xlsx') d3.to_excel(writer, 'Sheet1') writer.save() # dcr with an expertise in 不孕不育 d4 = dcr[dcr.doctor_skill.str.contains('不孕不育', na=False)] d5 = dcr[dcr.doctor_synopsis.str.contains('不孕不育', na=False)] d5 = d4.append(d5).drop_duplicates() writer = ExcelWriter('sterile.xlsx') d5.to_excel(writer, 'Sheet1') writer.save() len(d5) len(d5.hospital.unique()) len(d5.groupby(['hospital', 'dept']).count())
import os, pickle import pandas as pd from pandas import ExcelWriter from pandas import ExcelFile import numpy as np in_path = os.path.join(os.pardir, "aspects", "new_aspects.pkl") relations_file = open(in_path, 'rb') relations = pickle.load(relations_file) relations_file.close() keywords = set() for topics in relations.values(): for sets in topics.values(): for _, relation, _, _, _ in sets: for word in relation.split(): keywords.add(word) print(len(keywords)) # out_path = os.path.join(os.pardir, "aspects", "keywords.txt") # with open(out_path, 'a') as f: # for w in keywords: # f.write(w+"\n") df = pd.DataFrame({'keywords': list(keywords)}) out_path = os.path.join(os.pardir, "aspects", "keywords.xlsx") writer = ExcelWriter(out_path) df.to_excel(writer, 'Sheet1', index=False) writer.save()
def time_write_excel(self, engine): bio = BytesIO() bio.seek(0) writer = ExcelWriter(bio, engine=engine) self.df.to_excel(writer, sheet_name="Sheet1") writer.save()
def ExcelWrite(yourdf, file, sheet): writer = ExcelWriter(file) yourdf.to_excel(writer, sheet) writer.save()
c.append('Gujrat') if value2.endswith('(UT)') == True: a.append(key2) c.append('Uttarakhand') if value2.endswith('(RJ)') == True: a.append(key2) c.append('Rajasthan') if value2.endswith('(SK)') == True: a.append(key2) c.append('Sikkhim') if value2.endswith('(WB)') == True: a.append(key2) c.append('West Bengal') if value2.endswith('(CG)') == True: a.append(key2) c.append('Chhattisgarh') if value2.endswith('(TG)') == True: a.append(key2) c.append('Telangana') s=dict(zip(a,c)) f={'Cadre':s} b.update(f) print b df = pd.DataFrame(b) writer = ExcelWriter('(final)appendixc2.xlsx', engine='xlsxwriter') df.to_excel(writer, 'Sheet1', index=False) writer.save()
# 검색 결과 확인 elem = driver.find_element_by_class_name('results') div_list = elem.find_elements_by_tag_name('div') # 검색 결과 모두 긁어서 리스트로 저장 results = [] for div in div_list: results.append(div.text) a_tags = div.find_elements_by_tag_name('a') if a_tags: for a_tag in a_tags: link = a_tag.get_attribute('href') results.append(link) # 검색결과 모음 리스트를 12개씩 분할하여 새로운 리스트로 저장 result = [ results[i * 12:(i + 1) * 12] for i in range((len(results) + 12 - 1) // 12) ] df = DataFrame(data=result) filePath = f'Exam-{idx}.xlsx' with ExcelWriter(filePath) as writer: df.to_excel(writer, index=False) writer.close() # 결과 출력 print(result) except Exception as e: # 위 코드에서 에러가 발생한 경우 출력 print(e) finally: # 에러와 관계없이 실행되고, 크롬 드라이버를 종료 pass # driver.quit()
class ImpactTableGenerator: def __init__(self, config_file, input_filename=None, output_filename_base=None, output_directory=None): # create parameters self.__dict__.update(utils.get_config(config_file)) # Load list of locations self.locations_df = self._load_locations() # load spacy model logger.info("Loading model {}".format(self.model)) self.nlp = spacy.load(self.model) # get df of articles self.articles_df = self._load_articles(input_filename) # get keywords self.keywords = ImpactTableGenerator._get_keywords(config_file) # prepare output if output_filename_base is None: output_filename_base = 'impact_data_{keyword}_{country}'.format( keyword=self.keyword, country=self.country) self.output_filename_base = output_filename_base if output_directory is None: self.output_directory = OUTPUT_DIRECTORY else: self.output_directory = output_directory if not os.path.exists(self.output_directory): os.makedirs(self.output_directory) self.writer = ExcelWriter( os.path.join(self.output_directory, self.output_filename_base + '.xlsx')) self.df_impact = ImpactTableGenerator._make_df_impact() def loop_over_articles(self): n_articles = len(self.articles_df) for id_row in range(n_articles): logger.info("Analyzing article {}/{}...".format( id_row + 1, n_articles)) article = Article.Article(self.articles_df.iloc[id_row], self.language, self.keywords, self.nlp, self.locations_df) article.analyze(self.language, self.keywords, self.df_impact) logger.info("...finished article {}/{}, updating file\n".format( id_row + 1, n_articles)) if not self.df_impact.empty: self.df_impact.to_csv(os.path.join( self.output_directory, self.output_filename_base + '.csv'), mode='w', encoding='utf-8', sep='|') self.df_impact.to_excel(self.writer, sheet_name='Sheet1') self.writer.save() logger.info('found {} entries'.format(len(self.df_impact))) self.df_impact.dropna(how='all', inplace=True) logger.info('{}'.format(self.df_impact.describe())) logger.info('{}'.format(self.df_impact.head())) self.df_impact.to_csv(os.path.join(self.output_directory, self.output_filename_base + '.csv'), mode='w', encoding='utf-8', sep='|') self.df_impact.to_excel(self.writer, sheet_name='Sheet1') self.writer.save() @staticmethod def _make_df_impact(): levels = [[], [], []] codes = [[], [], []] names = ['location', 'date', 'article_num'] columns = [ 'damage_livelihood', 'damage_general', 'people_affected', 'people_dead', 'houses_affected', 'livelihood_affected', 'infrastructures_affected', 'infrastructures_mentioned', 'sentence(s)', 'article_title' ] return pd.DataFrame(index=pd.MultiIndex(levels=levels, codes=codes, names=names), columns=columns) def _load_articles(self, input_filename): # load DataFrame with articles input_directory = utils.INPSECTED_ARTICLES_OUTPUT_DIR if input_filename is None: input_filename = utils.get_inspected_articles_output_filename({ 'keyword': self.keyword, 'country': self.country }) df = pd.read_csv(os.path.join(input_directory, input_filename), sep='|').drop_duplicates(['title', 'text'], keep=False) df['publish_date'] = df['publish_date'].apply(pd.to_datetime) logger.info('got {} articles:'.format(len(df))) logger.info('{} -- {}'.format( df['publish_date'].min().strftime('%Y-%m-%d'), df['publish_date'].min().strftime('%Y-%m-%d'))) return df @staticmethod def _get_keywords(config_file): keywords_config = utils.get_keywords(config_file) keyword_list = [ 'donation', 'type_livelihood', 'type_people_multiple', 'type_people_death', 'list_verb_death', 'type_house', 'local_currency_names_short', 'currency_short', 'local_currency_code', 'local_currency_names_long', 'currency_long', 'titles' ] keywords = { keyword: ast.literal_eval(keywords_config[keyword]) for keyword in keyword_list } keywords['type_people'] = utils.read_keyword_csv( keywords_config['filename_type_people']) keywords['type_infrastructure'] = utils.read_keyword_csv( keywords_config['filename_type_infrastructures']) keywords['currency_short'] = keywords[ 'local_currency_names_short'] + keywords['currency_short'] keywords['currency_long'] = keywords[ 'local_currency_names_long'] + keywords['currency_long'] return keywords def _load_locations(self): """ build a dictionary of locations {name: coordinates} from a gazetteer in tab-separated csv format (http://geonames.nga.mil/gns/html/namefiles.html) """ input_file = os.path.join(LOCATIONS_FOLDER, self.country, self.country_short + '_administrative_a.txt') columns = ['FULL_NAME_RO', 'FULL_NAME_ND_RO', 'LAT', 'LONG', 'ADM1'] locations_df = pd.read_csv(input_file, sep='\t', encoding='utf-8', usecols=columns) input_file = os.path.join(LOCATIONS_FOLDER, self.country, self.country_short + '_localities_l.txt') locations_df = locations_df.append(pd.read_csv(input_file, sep='\t', encoding='utf-8', usecols=columns), ignore_index=True) input_file = os.path.join( LOCATIONS_FOLDER, self.country, self.country_short + '_populatedplaces_p.txt') locations_df = locations_df.append(pd.read_csv(input_file, sep='\t', encoding='utf-8', usecols=columns), ignore_index=True) locations_df = locations_df[~locations_df['FULL_NAME_ND_RO'].str. contains(self.country)] locations_df["ADM1"] = pd.to_numeric(locations_df["ADM1"], errors='coerce') return locations_df