def getsheets(filepath): # !pip install xlsx2csv Xlsx2csv(filepath, outputencoding="utf-8").convert("Orders.csv") Xlsx2csv(filepath, outputencoding="utf-8").convert("Orders_rank.csv") data = 'Orders.csv' # Checking the CSV content csv_data = pd.read_csv(data, delimiter = ',') for row in csv_data: print(row)
def xlsx2csv(filename_xlsx: str, filename_csv: str): if filename_xlsx.startswith("https://") or filename_xlsx.startswith( "http://"): with tempfile.NamedTemporaryFile() as tmp: download_file_from_url(filename_xlsx, tmp.name) Xlsx2csv(tmp.name, outputencoding="utf-8").convert(filename_csv) else: Xlsx2csv(filename_xlsx, outputencoding="utf-8").convert(filename_csv)
def xlsx(self): kwargs = { 'delimiter' : '\t', 'skip_empty_lines' : True, 'outputencoding': 'utf-8', 'hyperlinks': True, } sheetid = 1 xlsx2csv = Xlsx2csv(self.path.as_posix(), **kwargs) ns = len(xlsx2csv.workbook.sheets) if ns > 1: message = f'too many sheets ({ns}) in {self.path.as_posix()!r}' self.addError(exc.EncodingError(message), blame='submission', path=self.path) logd.error(message) f = io.StringIO() try: xlsx2csv.convert(f, sheetid) f.seek(0) gen = csv.reader(f, delimiter='\t') yield from gen except SheetNotFoundException as e: log.warning(f'Sheet weirdness in{self.path}') log.warning(str(e))
def ex_csv(): for files in glob.glob('*.xlsx'): filelist.append(files) progbar = ttk.Progressbar(formulario, variable=count, maximum=len(filelist)) progbar.place(x=130, y=280) lista = pd.DataFrame(filelist) lista.to_csv(path + 'lista archivos.txt', header=False) for counter, fileitem in enumerate(filelist): csv = fileitem.replace('.xlsx', '.csv') try: Xlsx2csv(path + fileitem, outputencoding='utf-8').convert(path + csv) except: pass count.set(counter + 1) formulario.update_idletasks() refresh()
def meta_extract(file): ticker=fileTicker(file) t1= process_time() Xlsx2csv("datasets/"+str(file), outputencoding="utf-8",include_sheet_pattern="^Data",).convert("META_FILES1/Data/"+str(ticker),sheetid=0) t2= process_time() print("Done Meta Extraction !!") print("Time take by "+str(file)+" is : ",(t2-t1),"sec") print("******************************************************************************")
def meta_extract(file): ticker = fileTicker(file) Xlsx2csv( "datasets/" + str(file), outputencoding="utf-8", include_sheet_pattern="^Empirical Model", ).convert("META_FILES1/" + str(ticker), sheetid=0) Xlsx2csv( "datasets/" + str(file), outputencoding="utf-8", include_sheet_pattern="^Emperical Model", ).convert("META_FILES1/" + str(ticker), sheetid=0) Xlsx2csv( "datasets/" + str(file), outputencoding="utf-8", include_sheet_pattern="^Regression Model", ).convert("META_FILES1/" + str(ticker), sheetid=0)
def __convert_xlsx2csv__(root_dir): for root, dirs, files in os.walk(root_dir): for name in files: fullname = os.path.join(root, name) if os.path.isfile(fullname): outname = os.path.join(root, 'converted', re.sub(r'.xlsx', '.csv', name)) # Тут указывается кодировка и разделитель, sheetid=0 - значит, что конвертим все листы из книги, # иначе пишем номер нужного листа Xlsx2csv(fullname, outputencoding="utf-8", delimiter='#').convert(outname, sheetid=0)
def process_roster_file_upload(file, section): if file and allowed_file(file.filename): filename = secure_filename(file.filename) uploaded_filename = os.path.join(app.config['UPLOAD_FOLDER'], filename) csv_filename = filename_prefix(uploaded_filename) + '.' + 'csv' file.save(uploaded_filename) Xlsx2csv(uploaded_filename, outputencoding="utf-8").convert(csv_filename) rdb = RosterToDb(section.id, csv_filename) roster = rdb.roster
def xlsx_to_csv(self, file_names): """ 传入xlsx文件名,存为csv文件, 同时返回csv扩展名的文件名 :return: """ file_name = file_names.split('.')[0] # 获取不带扩展名的文件名 xlsx_path = os.path.join(upload_folder, file_names) csv_path = os.path.join(upload_folder, file_name + '.csv') Xlsx2csv(xlsx_path, outputencoding="utf-8").convert(csv_path) return file_name + '.csv'
def csvfmxlsx( xlsxfl, lst): # create csv files in csv folder on parent directory from list (xlsxfl.parent / 'csv').mkdir(parents=True, exist_ok=True) for index in range(len(lst)): # table row iteration by audit2 column type shnum = lst[index]['id'] shnph = xlsxfl.parent / 'csv' / Path( lst[index]['name'] + '.csv') # path for converted csv file Xlsx2csv(str(xlsxfl), outputencoding="utf-8").convert( str(shnph), sheetid=int(shnum)) # id from openxlsx return
def csvfrmxlsx(xlsxfl, df): # create csv files in csv folder on parent directory (xlsxfl.parent / 'csv').mkdir(parents=True, exist_ok=True) for index, row in df.iterrows( ): # table row iteration by audit2 column type shnum = row['id'] shnph = xlsxfl.parent / 'csv' / Path( row['name'] + '.csv') # path for converted csv file Xlsx2csv(str(xlsxfl), outputencoding="utf-8").convert( str(shnph), sheetid=int(shnum)) # id from openxlsx return
def normalize_tabular_format(project_path): kwargs = { 'delimiter': '\t', 'skip_empty_lines': True, 'outputencoding': 'utf-8', } sheetid = 0 for xf in project_path.rglob('*.xlsx'): xlsx2csv = Xlsx2csv(xf, **kwargs) with open(xf.with_suffix('.tsv'), 'wt') as f: try: xlsx2csv.convert(f, sheetid) except SheetNotFoundException as e: log.warning(f'Sheet weirdness in {xf}\n{e}')
def pd_open_file(path): # pd_data = pd.read_excel(path, engine='openpyxl') encoding = 'utf-8' if platform.system() == 'Linux': encoding = 'utf-8' logger.info('Platform is {}'.format(platform.system())) elif platform.system() == 'Windows': encoding = 'ANSI' logger.info('Platform is {}'.format(platform.system())) Xlsx2csv(path, outputencoding='{}'.format(encoding)).convert(csvpath) pd_data = pd.read_csv(csvpath, encoding='{}'.format(encoding)) pd_data.dropna(axis="index", how='all', inplace=True) pd_data.dropna(axis="columns", how='all', inplace=True) return pd_data
def init(): """Download, extract, and init the project's raw data. This function must be called from the root of the project. It will download the data, split it into csv files, and then compile it into one flat data file. """ click.echo("Starting project initialization ...") dataset_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00502/online_retail_II.xlsx" # path to the downloaded dataset fp = "data/raw/online_retail_II.xlsx" # if the data hasn't been downloaded, download it if not path.exists(fp): click.echo(f"Downloading {path.basename(fp)} to {fp}") download(dataset_url, "data/raw") click.echo(f"Finished downloading to {fp}") xlsx = Xlsx2csv(fp) # excel to csv object if not path.exists( "data/raw/txs_2009.csv"): # create the 2009 txs data csv click.echo("Converting 2009 transaction data to csv") xlsx.convert("data/raw/txs_2009.csv", sheetid=1) click.echo("Finsihed converting 2009 transaction data to csv") if not path.exists( "data/raw/txs_2010.csv"): # create the 2010 txs data csv click.echo("Converting 2010 transaction data to csv") xlsx.convert("data/raw/txs_2010.csv", sheetid=2) click.echo("Finsihed converting 2010 transaction data to csv") # compile the two separate data sources into one csv file click.echo("Starting data compilation") tx_2009_df = pd.read_csv("data/raw/txs_2009.csv") tx_2010_df = pd.read_csv("data/raw/txs_2010.csv") compiled_df = tx_2009_df.append(tx_2010_df) click.echo("Finished compiling data") # save the compiled data compiled_df.to_csv("data/raw/data.csv", index=False) click.echo("Saved compiled data to data/raw/data.csv") # decompress geojson data if not path.exists("data/external/countries.geojson"): click.echo("Decompressing countries.geojson") with gzip.open("data/external/countries.geojson.gz") as f: with open("data/external/countries.geojson", "wb") as g: g.write(f.read())
def process_roster_file_upload(cls, file, section): if file and cls._allowed_file(file.filename): filename = secure_filename(file.filename) uploaded_filename = os.path.join(app.config['UPLOAD_FOLDER'], filename) csv_filename = cls._filename_prefix( uploaded_filename) + '.' + 'csv' if not os.path.exists(os.path.dirname(csv_filename)): try: os.makedirs(os.path.dirname(csv_filename)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise file.save(uploaded_filename) Xlsx2csv(uploaded_filename, outputencoding="utf-8").convert(csv_filename) return cls.create_roster(section, csv_filename)
def convert_to_csv(input_filename): """ Open Excel file and transforms it to CSV :param input_filename: Excel file to convert to CSV """ Xlsx2csv(input_filename, outputencoding="cp1250").convert("temp.csv") with open('temp.csv', 'r', encoding='ISO-8859-1') as file_r, open( "file_" + datetime.datetime.now().strftime("%Y_%m_%d_%H_%M") + ".csv", 'w') as file_w: for line in file_r: line = line.split(',') line[0] = re.sub("'", '', line[0]) line = ','.join(line) file_w.write(line) os.remove("temp.csv")
def xlsx(self): kwargs = { 'delimiter' : '\t', 'skip_empty_lines' : True, 'outputencoding': 'utf-8', } sheetid = 0 xlsx2csv = Xlsx2csv(self.path.as_posix(), **kwargs) f = io.StringIO() try: xlsx2csv.convert(f, sheetid) f.seek(0) gen = csv.reader(f, delimiter='\t') # avoid first row sheet line next(gen) yield from gen except SheetNotFoundException as e: log.warning(f'Sheet weirdness in{self.path}') log.warning(str(e))
def xlsx1(self): kwargs = { 'delimiter': '\t', 'skip_empty_lines': True, 'outputencoding': 'utf-8', 'hyperlinks': True, } sheetid = 1 try: xlsx2csv = Xlsx2csv(self.path.as_posix(), **kwargs) except InvalidXlsxFileException as e: raise exc.NoDataError(f'{self.path}') from e ns = len(xlsx2csv.workbook.sheets) if ns > 1: message = f'too many sheets ({ns}) in {self.path.as_posix()!r}' if self.addError(exc.EncodingError(message), blame='submission', path=self.path): logd.error(message) f = io.StringIO() try: xlsx2csv.convert(f, sheetid) f.seek(0) gen = csv.reader(f, delimiter='\t') yield from gen except SheetNotFoundException as e: log.warning(f'Sheet weirdness in {self.path}') log.warning(str(e)) except AttributeError as e: message = ('Major sheet weirdness (maybe try resaving, ' 'probably a bug in the xlsx2csv converter)? ' f'in {self.path}') if self.addError(exc.EncodingError(message), blame='submission', path=self.path): #log.exception(e) logd.critical(message)
import os from xlsx2csv import Xlsx2csv INPUT_DIR = r'xlsx_input' OUTPUT_DIR = r'xlsx_output' for filename in os.listdir(INPUT_DIR): if filename.endswith(".xls") or filename.endswith(".xlsx"): name, file_extension = os.path.splitext(filename) input_file = os.path.join(INPUT_DIR, filename) output_file = os.path.join(OUTPUT_DIR, name) + ".csv" Xlsx2csv(input_file, outputencoding="utf-8").convert(output_file)
def process_file(service, fieldmap, report_config, file_id, report_time): working_dir = '/tmp/' out_file = io.FileIO(os.path.join(working_dir, report_config['stream_name'] + '.xlsx'), mode='wb') # Create a get request. request = service.files().get_media(reportId=report_config['report_id'], fileId=file_id) # Create a media downloader instance. # Optional: adjust the chunk size used when downloading the file. downloader = http.MediaIoBaseDownload(out_file, request, chunksize=CHUNK_SIZE) # Execute the get request and download the file. download_finished = False while download_finished is False: _, download_finished = downloader.next_chunk() csv_file = os.path.join(working_dir, report_config['stream_name'] + '.csv') Xlsx2csv(os.path.realpath(out_file.name), outputencoding="utf-8").convert(csv_file) report_id = report_config['report_id'] stream_name = report_config['stream_name'] stream_alias = report_config['stream_alias'] line_state = {'headers_line': False, 'past_headers': False, 'count': 0} report_id_int = int(report_id) # def line_transform(line): if line.startswith((' ', '\t')): return if not line_state['past_headers'] and not line_state[ 'headers_line'] and 'Report Fields' in line: line_state['headers_line'] = True return if line_state['headers_line']: line_state['headers_line'] = False line_state['past_headers'] = True return if line_state['past_headers']: row = parse_line(line) # skip report grant total line if row[0] == 'Grand Total:': return obj = {} for i in range(len(fieldmap)): field = fieldmap[i] obj[field['name']] = transform_field(field['type'], row[i]) obj[SINGER_REPORT_FIELD] = report_time obj[REPORT_ID_FIELD] = report_id_int singer.write_record(stream_name, obj, stream_alias=stream_alias) line_state['count'] += 1 with open(csv_file) as f: for line in f: line_transform(line) with singer.metrics.record_counter(stream_name) as counter: counter.increment(line_state['count'])
import requests import json import time import configparser config = configparser.ConfigParser() config.read('./config.ini') print("Assurez-vous d'avoir placer le fichier du jour au format XLSX dans ../data/aides/XLSX") daytoprocess = sys.argv[1] Xlsx2csv("../data/aides/xlsx/"+daytoprocess+".XLSX", outputencoding="utf-8").convert("../data/aides/csv/"+daytoprocess+".csv") print("CSV généré") df = pd.read_csv("../data/aides/csv/"+daytoprocess+".csv", dtype={"Période":str,"SIREN":str,"Cde postal": str,"Pays":str,"Montant":float,"Dev.":str,"Date paiement":str}) df = df.dropna(subset=['Date paiement']) df.fillna({'SIREN':'WALLISETFUTUNA'}, inplace=True) print("Dataframe aide chargé") print(str(df.shape[0])+" lignes") df['mois'] = df['Période'].apply(lambda x: x.split("-")[0]) df['volet'] = df['Période'].apply(lambda x: x.split("-")[1]) df = df.rename(columns={'SIREN': 'siren'})
def convert_to_csv(self): """ Convert from xlsx to csv """ Xlsx2csv(self.local_file + ".xlsx").convert(self.local_file + ".csv", sheetid=1)
def get_reqs(xlsx_file): """Return a dictionary of PSMRequirements based on XSLX_FILE. Return a dict mapping PSM req IDs to PSMRequirement instances.""" # This works by converting the requirements spreadsheet to CSV, # using xlsx2csv, and then using Python's built-in CSV reader. reqs = {} families_seen = set() if not os.path.exists(xlsx_file): raise ValueError( "ERROR: can't find requirements spreadsheet {}\n".format( xlsx_file)) csv_fh = StringIO() Xlsx2csv(xlsx_file).convert(csv_fh, sheetid=0) csv_fh.seek(0) csv_reader = csv.reader(csv_fh) current_family = None # two-letter req family code, e.g, "FR", etc current_category = None for row in csv_reader: if (len(row) == 1 and row[0].startswith("-------- ")): # family row current_family = family_from_header(row[0]) if current_family in families_seen: raise PSMRequirementFamilyException( "ERROR: encountered family '%s' more than once" % current_family) elif current_family is not None: families_seen.add(current_family) elif (len(row) > 1 and row[0] == "" and row[1] != ""): # category row if current_family is not None: current_category = current_family + " " + row[1] else: current_category = None elif (len(row) >= 11 and current_family is not None and row[0] != "" and _req_id_re.match(row[0]) is not None): # req row if current_family is None: warn("WARNING: got req \"%s \" while no family active" % row[0]) if current_category is not None: row[1] = current_category else: warn("WARNING: requirement '%s' has no category" % (row[0])) req = PSMRequirement(current_family, *row) if req.req_id in reqs: # Can't happen, but let's be extra careful. raise PSMRequirementException( "ERROR: encountered req '%s' more than once" % req.req_id) reqs[req.req_id] = req elif (len(row) >= 11 and row[0] == 'Requirement ID Number' and row[1] == 'Requirement Category' and row[2] == 'Requirement Statement' and row[3] == 'Priority' and row[4] == 'Rank' and row[5] == 'Source' and row[6] == 'Source Document' and row[7] == 'Release' and row[8] == 'Design Reference' and row[9] == 'Acceptance Test Reference' and row[10] == 'Comment'): pass # skip CSV header rows elif current_family is not None: warn("WARNING: not really sure what this row is:") warn(" %s" % row) return reqs
def ConvertToCsvFile(self, filePath, outClientDir, outServerDir, allSheet=False, tmpDir=None, fileFmt="txt"): self._filePath = os.path.abspath(filePath) baseName = os.path.basename(filePath) fileName, ext = os.path.splitext(baseName) tempTxt = os.path.abspath("./temp.%s" % (fileFmt)) if (tmpDir != None): tempTxt = os.path.abspath("%s/temp.%s" % (tmpDir, fileFmt)) if (os.access(tempTxt, os.F_OK) == True): os.remove(tempTxt) txtFd = codecs.open(tempTxt, 'w', "utf-8") xlsx2csv = Xlsx2csv(self._filePath, delimiter='\t', hyperlinks=False, dateformat=None, sheetdelimiter=self.sheetdelimiter, skip_empty_lines=False, escape_strings=False, cmd=False) if (allSheet == False): xlsx2csv.convert(txtFd, 1) else: xlsx2csv.convert(txtFd, 0) txtFd.close() clientOutputFilePath = None serverOutputFilePath = None txtFd = codecs.open(tempTxt, 'r', "utf-8") try: with txtFd as csvfile: csvReader = csv.reader(csvfile, delimiter='\t') if (allSheet == False): self._parseFiledBelong(csvReader, Config.belongLine) csvfile.seek(0) self._parseMembersType(csvReader, Config.typeLine) else: self._parseFiledBelong(csvReader, Config.belongLine + 1) csvfile.seek(0) self._parseMembersType(csvReader, Config.typeLine + 1) # save client if (outClientDir != None): csvfile.seek(0) outDirC = outClientDir if not os.path.exists(outDirC): os.makedirs(outDirC) clientOutputFilePath = self._convertToFile( csvReader, os.path.abspath(outDirC), fileName, "C", True, fileFmt) baseName = os.path.basename(filePath) if baseName == "Language.xlsx": splitLanguage = SplitLanguage(clientOutputFilePath, outDirC) splitLanguage.Run() if os.path.exists(clientOutputFilePath): os.remove(clientOutputFilePath) # save client if (outServerDir != None): csvfile.seek(0) outDirS = outServerDir if not os.path.exists(outDirS): os.makedirs(outDirS) serverOutputFilePath = self._convertToFile( csvReader, os.path.abspath(outDirS), fileName, "S", False, fileFmt) except Exception, e: raise e
def main(args): dateMatch = re.search(r"""[0-9]+-[0-9]+-[0-9]+""", args['-d']) if dateMatch is None: print( "[ERROR] The input CineTV export folder must contain a date in the format: YYYY-MM-DD" ) sys.exit(1) date = dateMatch.group(0) outputdir = f'{path.join(args["-o"], "cinetv-" + date)}' shutil.rmtree(outputdir) # if os.path.exists(outputdir): # for f in glob(f'{outputdir}/*'): # os.remove(f) # os.rmdir(outputdir) for f in glob(path.join(args['-d'], '*.xlsx')): csvFile = path.join(args['-d'], f.replace('.xlsx', '') + '.csv') if args['--reload'] or not os.path.exists(csvFile): print(f'[INFO] Converting file {f} to {csvFile}') Xlsx2csv(f, skip_empty_lines=True, outputencoding='utf-8').convert(csvFile) csvFilesPattern = path.join(args['-d'], '*.csv') Path(outputdir).mkdir(parents=True, exist_ok=True) cinetvDbPath = path.join(outputdir, f'cinetv-{date}.db') print( f'[INFO] Converting generated CSV files from {csvFilesPattern} to SQLite database at {cinetvDbPath}' ) # if os.system(f'csvs-to-sqlite {csvFilesPattern} {cinetvDbPath}') != 0: # return os.system(f'csvs-to-sqlite {csvFilesPattern} {cinetvDbPath}') cinetvExtDbPath = path.join(outputdir, f'cinetv-{date}-ext.db') cinetvExtAutoDbPath = path.join(outputdir, f'cinetv-{date}-ext-auto.db') if args["-e"] and args["-a"]: csvFilesExtPattern = path.join(args['-e'], '*.csv') print( f'[INFO] Converting CineTV CSV files ({csvFilesPattern}) + CineTV extension CSV files ({csvFilesExtPattern}) to SQLite database at {cinetvExtDbPath}' ) os.system( f'csvs-to-sqlite {csvFilesPattern} {csvFilesExtPattern} {cinetvExtDbPath}' ) print( f'[INFO] Updating CineTV automatically generated extension data...' ) os.system( f'cinetvlinking-exe filmo -d {cinetvExtDbPath} -o {args["-a"]}') os.system( f'cinetvlinking-exe nom apply -d {cinetvExtDbPath} -o {args["-a"]}' ) csvFilesExtAutoPattern = path.join(args['-a'], '*.csv') print( f'[INFO] Converting CineTV CSV files ({csvFilesPattern}) + CineTV extension CSV files ({csvFilesExtPattern}) + CineTV automatically generated CSV files {csvFilesExtAutoPattern} to SQLite database at {cinetvExtAutoDbPath}' ) # os.system(f'csvs-to-sqlite --replace-tables {csvFilesPattern} {csvFilesExtPattern} {csvFilesExtAutoPattern} {cinetvExtAutoDbPath}') os.system( f'csvs-to-sqlite {csvFilesPattern} {csvFilesExtPattern} {cinetvExtAutoDbPath}' ) for f in glob(path.join(args['-a'], '*.csv')): tablename = path.splitext(path.basename(f))[0] os.system( f'csvs-to-sqlite -pk NomID -t {tablename} {f} {cinetvExtAutoDbPath}' ) print( f'[INFO] Creating a public subset of CineTV database {cinetvExtAutoDbPath} at {outputdir}' ) os.system(f'cinetv2public-exe -s {cinetvExtAutoDbPath} -d {outputdir}') elif args["-e"] and not args["-a"]: csvFilesExtPattern = path.join(args['-e'], '*.csv') print( f'[INFO] Converting CineTV CSV files ({csvFilesPattern}) + CineTV extension CSV files ({csvFilesExtPattern}) to SQLite database at {cinetvExtDbPath}' ) os.system( f'csvs-to-sqlite {csvFilesPattern} {csvFilesExtPattern} {cinetvExtDbPath}' ) print( f'[INFO] Creating a public subset of CineTV database {cinetvExtDbPath} at {outputdir}' ) os.system(f'cinetv2public-exe -s {cinetvExtDbPath} -d {outputdir}') elif not args["-e"] and args["-a"]: print( f'[INFO] Updating CineTV automatically generated extension data...' ) os.system(f'cinetvlinking-exe filmo -d {cinetvDbPath} -o {args["-a"]}') os.system( f'cinetvlinking-exe nom apply -d {cinetvDbPath} -o {args["-a"]}') csvFilesExtAutoPattern = path.join(args['-a'], '*.csv') print( f'[INFO] Converting CineTV CSV files ({csvFilesPattern}) + CineTV automatically generated CSV files {csvFilesExtAutoPattern} to SQLite database at {cinetvExtAutoDbPath}' ) os.system( f'csvs-to-sqlite {csvFilesPattern} {csvFilesExtAutoPattern} {cinetvExtAutoDbPath}' ) print( f'[INFO] Creating a public subset of CineTV database {cinetvExtAutoDbPath} at {outputdir}' ) os.system(f'cinetv2public-exe -s {cinetvExtAutoDbPath} -d {outputdir}') else: print( f'[INFO] Creating a public subset of CineTV database {cinetvDbPath} at {outputdir}' ) os.system(f'cinetv2public-exe -s {cinetvDbPath} -d {outputdir}') cinetvPublicDbPath = path.join(outputdir, f'cinetv-{date}-publique.db') print("[INFO] Exporting SQLite database to CSV files...") cinetvCsvPath = path.join(outputdir, "csv") Path(cinetvCsvPath).mkdir(parents=True, exist_ok=True) os.system( f'sqlite-dump-to-csv --db {cinetvPublicDbPath} --output {cinetvCsvPath}' ) print("[INFO] Generating a tarball from CSV files...") cinetvCsvTarPath = path.join(outputdir, f'cinetv-{date}-csv.tar.gz') with tarfile.open(cinetvCsvTarPath, "w:gz") as tar: for f in glob(f'{cinetvCsvPath}/*'): tar.add(f, arcname=f"cinetv-{date}/{path.basename(f)}") print("[INFO] Generating a tarball from SQLite DB file...") cinetvPublicDbTarPath = path.join(outputdir, f'cinetv-{date}-sqlite.tar.gz') with tarfile.open(cinetvPublicDbTarPath, "w:gz") as tar: tar.add(cinetvPublicDbPath, arcname=f"cinetv-{date}/cinetv-{date}.db")
parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', metavar='FILENAME', help='name of input file (REQUIRED)', required=True) parser.add_argument('-k', '--knp', metavar='FILENAME', help='The list of knp terms (REQUIRED)', required=True) args = parser.parse_args() #convert the input xlsx file to csv try: Xlsx2csv(args.input, outputencoding="utf-8").convert("temp.csv") except FileNotFoundError: sys.exit('Unable to open {}. Does the file exist?'.format(args.input)) except PermissionError: sys.exit('Unable to open {}. Check your file permissions.'.format( args.input)) except: sys.exit('Something broke trying to convert the excel file into csv.') #dump the knp terms into a list try: knp_terms = list() with open(args.knp, 'r') as knp_list: content = knp_list.readlines() for line in content: knp_terms.append(line)
from xlsx2csv import Xlsx2csv MIN_SALARY = 15080 name = sys.argv[1] url = sys.argv[2] # download data print("Downloading data for " + name) xlsx_path = wget.download(url, out="data/{}.xlsx".format(name)) # convert to csv print() print("Converting data/{}.xlsx to data/{}.csv".format(name, name)) csv_path = "data/{}.csv".format(name) Xlsx2csv(xlsx_path, outputencoding="utf-8").convert(csv_path) # save to db print("Saving {} to database".format(name)) print("Reading file: " + csv_path) sheet = pd.read_csv(csv_path) print("Renaming columns...") sheet.rename( columns={ "PERIOD_OF_EMPLOYMENT_START_DATE": "EMPLOYMENT_START_DATE", "PREVAILING_WAGE_1": "PREVAILING_WAGE", }, inplace=True, ) PERSON_FIELDS = [
import pandas as pd from xlsx2csv import Xlsx2csv # Create empty table path = r'C:/Users/H395978/AppData/Local/Programs/Thesis/stamm/H164632.GLOBAL/' #output_table.to_csv(path+'export_router_0001.csv',encoding='utf-8-sig',sep=',',index=False) Xlsx2csv(path + 'export_assy_0001.xlsx', outputencoding="utf-8").convert("assy.csv")
import pandas as pd from xlsx2csv import Xlsx2csv import sys daytoprocess = sys.argv[1] print("Convert XLSX to CSV") Xlsx2csv("../data/reports/xlsx/" + daytoprocess + ".xlsx", outputencoding="utf-8").convert("../data/reports/csv/" + daytoprocess + ".csv") print("Load csv") df = pd.read_csv("../data/reports/csv/" + daytoprocess + ".csv", header=3) print("Drop useless column") df = df.drop(columns={'Libellé A21'}) dep = [ '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '21', '22', '23', '24', '25', '26', '27', '28', '29', '2A', '2B', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '971', '972', '973', '974', '976', 'ND', 'total' ]
if not csv_path or not xlsx_path: raise Exception("Falta una ruta") f_ = open(csv_path, 'r', encoding='latin-1') df = pd.read_csv(f_, sep=sep) df.to_excel(xlsx_path, index=None, header=with_header) f_.close() if module == "xlsxToCsv": csv_path = GetParams("csv_path") xlsx_path = GetParams("xlsx_path") delimiter = GetParams("delimiter") try: if not delimiter: delimiter = "," Xlsx2csv(xlsx_path, outputencoding="utf-8", delimiter=delimiter).convert(csv_path) except Exception as e: PrintException() raise e if module == "countColumns": excel = GetGlobals("excel") sheet = GetParams("sheet") result = GetParams("var_") try: excel_path = excel.file_["default"]["path"] print(excel_path) df = pd.read_excel(excel_path, sheetname=sheet)