Beispiel #1
0
def getsheets(filepath):
    # !pip install xlsx2csv
    Xlsx2csv(filepath, outputencoding="utf-8").convert("Orders.csv")
    Xlsx2csv(filepath, outputencoding="utf-8").convert("Orders_rank.csv")
    data = 'Orders.csv' # Checking the CSV content
    csv_data = pd.read_csv(data, delimiter = ',')
    for row in csv_data:
        print(row)
Beispiel #2
0
def xlsx2csv(filename_xlsx: str, filename_csv: str):
    if filename_xlsx.startswith("https://") or filename_xlsx.startswith(
            "http://"):
        with tempfile.NamedTemporaryFile() as tmp:
            download_file_from_url(filename_xlsx, tmp.name)
            Xlsx2csv(tmp.name, outputencoding="utf-8").convert(filename_csv)
    else:
        Xlsx2csv(filename_xlsx, outputencoding="utf-8").convert(filename_csv)
Beispiel #3
0
    def xlsx(self):
        kwargs = {
            'delimiter' : '\t',
            'skip_empty_lines' : True,
            'outputencoding': 'utf-8',
            'hyperlinks': True,
        }
        sheetid = 1
        xlsx2csv = Xlsx2csv(self.path.as_posix(), **kwargs)
        ns = len(xlsx2csv.workbook.sheets)
        if ns > 1:
            message = f'too many sheets ({ns}) in {self.path.as_posix()!r}'
            self.addError(exc.EncodingError(message),
                          blame='submission',
                          path=self.path)
            logd.error(message)

        f = io.StringIO()
        try:
            xlsx2csv.convert(f, sheetid)
            f.seek(0)
            gen = csv.reader(f, delimiter='\t')
            yield from gen
        except SheetNotFoundException as e:
            log.warning(f'Sheet weirdness in{self.path}')
            log.warning(str(e))
Beispiel #4
0
    def ex_csv():

        for files in glob.glob('*.xlsx'):
            filelist.append(files)

        progbar = ttk.Progressbar(formulario,
                                  variable=count,
                                  maximum=len(filelist))
        progbar.place(x=130, y=280)

        lista = pd.DataFrame(filelist)
        lista.to_csv(path + 'lista archivos.txt', header=False)

        for counter, fileitem in enumerate(filelist):
            csv = fileitem.replace('.xlsx', '.csv')

            try:
                Xlsx2csv(path + fileitem,
                         outputencoding='utf-8').convert(path + csv)
            except:
                pass

            count.set(counter + 1)
            formulario.update_idletasks()
            refresh()
Beispiel #5
0
def meta_extract(file):
    ticker=fileTicker(file)
    t1= process_time()
    Xlsx2csv("datasets/"+str(file), outputencoding="utf-8",include_sheet_pattern="^Data",).convert("META_FILES1/Data/"+str(ticker),sheetid=0)
    t2= process_time()
    print("Done Meta Extraction !!")
    print("Time take by "+str(file)+" is : ",(t2-t1),"sec")
    print("******************************************************************************")
Beispiel #6
0
def meta_extract(file):
    ticker = fileTicker(file)
    Xlsx2csv(
        "datasets/" + str(file),
        outputencoding="utf-8",
        include_sheet_pattern="^Empirical Model",
    ).convert("META_FILES1/" + str(ticker), sheetid=0)
    Xlsx2csv(
        "datasets/" + str(file),
        outputencoding="utf-8",
        include_sheet_pattern="^Emperical Model",
    ).convert("META_FILES1/" + str(ticker), sheetid=0)
    Xlsx2csv(
        "datasets/" + str(file),
        outputencoding="utf-8",
        include_sheet_pattern="^Regression Model",
    ).convert("META_FILES1/" + str(ticker), sheetid=0)
Beispiel #7
0
def __convert_xlsx2csv__(root_dir):
    for root, dirs, files in os.walk(root_dir):
        for name in files:
            fullname = os.path.join(root, name)
            if os.path.isfile(fullname):
                outname = os.path.join(root, 'converted', re.sub(r'.xlsx', '.csv', name))
                # Тут указывается кодировка и разделитель, sheetid=0 - значит, что конвертим все листы из книги,
                # иначе пишем номер нужного листа
                Xlsx2csv(fullname, outputencoding="utf-8", delimiter='#').convert(outname, sheetid=0)
Beispiel #8
0
def process_roster_file_upload(file, section):
    if file and allowed_file(file.filename):
        filename = secure_filename(file.filename)
        uploaded_filename = os.path.join(app.config['UPLOAD_FOLDER'], filename)
        csv_filename = filename_prefix(uploaded_filename) + '.' + 'csv'
        file.save(uploaded_filename)
        Xlsx2csv(uploaded_filename, outputencoding="utf-8").convert(csv_filename)
        rdb = RosterToDb(section.id, csv_filename)
        roster = rdb.roster
Beispiel #9
0
 def xlsx_to_csv(self, file_names):
     """
     传入xlsx文件名,存为csv文件, 同时返回csv扩展名的文件名
     :return:
     """
     file_name = file_names.split('.')[0]  # 获取不带扩展名的文件名
     xlsx_path = os.path.join(upload_folder, file_names)
     csv_path = os.path.join(upload_folder, file_name + '.csv')
     Xlsx2csv(xlsx_path, outputencoding="utf-8").convert(csv_path)
     return file_name + '.csv'
Beispiel #10
0
def csvfmxlsx(
        xlsxfl,
        lst):  # create csv files in csv folder on parent directory from list
    (xlsxfl.parent / 'csv').mkdir(parents=True, exist_ok=True)
    for index in range(len(lst)):  # table row iteration by audit2 column type
        shnum = lst[index]['id']
        shnph = xlsxfl.parent / 'csv' / Path(
            lst[index]['name'] + '.csv')  # path for converted csv file
        Xlsx2csv(str(xlsxfl), outputencoding="utf-8").convert(
            str(shnph), sheetid=int(shnum))  # id from openxlsx
    return
Beispiel #11
0
def csvfrmxlsx(xlsxfl,
               df):  # create csv files in csv folder on parent directory
    (xlsxfl.parent / 'csv').mkdir(parents=True, exist_ok=True)
    for index, row in df.iterrows(
    ):  # table row iteration by audit2 column type
        shnum = row['id']
        shnph = xlsxfl.parent / 'csv' / Path(
            row['name'] + '.csv')  # path for converted csv file
        Xlsx2csv(str(xlsxfl), outputencoding="utf-8").convert(
            str(shnph), sheetid=int(shnum))  # id from openxlsx
    return
Beispiel #12
0
def normalize_tabular_format(project_path):
    kwargs = {
        'delimiter': '\t',
        'skip_empty_lines': True,
        'outputencoding': 'utf-8',
    }
    sheetid = 0
    for xf in project_path.rglob('*.xlsx'):
        xlsx2csv = Xlsx2csv(xf, **kwargs)
        with open(xf.with_suffix('.tsv'), 'wt') as f:
            try:
                xlsx2csv.convert(f, sheetid)
            except SheetNotFoundException as e:
                log.warning(f'Sheet weirdness in {xf}\n{e}')
Beispiel #13
0
def pd_open_file(path):
    # pd_data = pd.read_excel(path, engine='openpyxl')
    encoding = 'utf-8'
    if platform.system() == 'Linux':
        encoding = 'utf-8'
        logger.info('Platform is {}'.format(platform.system()))
    elif platform.system() == 'Windows':
        encoding = 'ANSI'
        logger.info('Platform is {}'.format(platform.system()))
    Xlsx2csv(path, outputencoding='{}'.format(encoding)).convert(csvpath)
    pd_data = pd.read_csv(csvpath, encoding='{}'.format(encoding))
    pd_data.dropna(axis="index", how='all', inplace=True)
    pd_data.dropna(axis="columns", how='all', inplace=True)
    return pd_data
Beispiel #14
0
def init():
    """Download, extract, and init the project's raw data.

    This function must be called from the root of the project.
    It will download the data, split it into csv files, and then compile
    it into one flat data file.
    """
    click.echo("Starting project initialization ...")
    dataset_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00502/online_retail_II.xlsx"

    # path to the downloaded dataset
    fp = "data/raw/online_retail_II.xlsx"

    # if the data hasn't been downloaded, download it
    if not path.exists(fp):
        click.echo(f"Downloading {path.basename(fp)} to {fp}")
        download(dataset_url, "data/raw")
        click.echo(f"Finished downloading to {fp}")
        xlsx = Xlsx2csv(fp)  # excel to csv object
    if not path.exists(
            "data/raw/txs_2009.csv"):  # create the 2009 txs data csv
        click.echo("Converting 2009 transaction data to csv")
        xlsx.convert("data/raw/txs_2009.csv", sheetid=1)
        click.echo("Finsihed converting 2009 transaction data to csv")
    if not path.exists(
            "data/raw/txs_2010.csv"):  # create the 2010 txs data csv
        click.echo("Converting 2010 transaction data to csv")
        xlsx.convert("data/raw/txs_2010.csv", sheetid=2)
        click.echo("Finsihed converting 2010 transaction data to csv")

    # compile the two separate data sources into one csv file
    click.echo("Starting data compilation")
    tx_2009_df = pd.read_csv("data/raw/txs_2009.csv")
    tx_2010_df = pd.read_csv("data/raw/txs_2010.csv")
    compiled_df = tx_2009_df.append(tx_2010_df)
    click.echo("Finished compiling data")

    # save the compiled data
    compiled_df.to_csv("data/raw/data.csv", index=False)
    click.echo("Saved compiled data to data/raw/data.csv")

    # decompress geojson data
    if not path.exists("data/external/countries.geojson"):
        click.echo("Decompressing countries.geojson")
        with gzip.open("data/external/countries.geojson.gz") as f:
            with open("data/external/countries.geojson", "wb") as g:
                g.write(f.read())
Beispiel #15
0
 def process_roster_file_upload(cls, file, section):
     if file and cls._allowed_file(file.filename):
         filename = secure_filename(file.filename)
         uploaded_filename = os.path.join(app.config['UPLOAD_FOLDER'],
                                          filename)
         csv_filename = cls._filename_prefix(
             uploaded_filename) + '.' + 'csv'
         if not os.path.exists(os.path.dirname(csv_filename)):
             try:
                 os.makedirs(os.path.dirname(csv_filename))
             except OSError as exc:  # Guard against race condition
                 if exc.errno != errno.EEXIST:
                     raise
         file.save(uploaded_filename)
         Xlsx2csv(uploaded_filename,
                  outputencoding="utf-8").convert(csv_filename)
         return cls.create_roster(section, csv_filename)
def convert_to_csv(input_filename):
    """
    Open Excel file and transforms it to CSV

    :param input_filename: Excel file to convert to CSV
    """
    Xlsx2csv(input_filename, outputencoding="cp1250").convert("temp.csv")

    with open('temp.csv', 'r', encoding='ISO-8859-1') as file_r, open(
            "file_" + datetime.datetime.now().strftime("%Y_%m_%d_%H_%M") +
            ".csv", 'w') as file_w:
        for line in file_r:
            line = line.split(',')
            line[0] = re.sub("'", '', line[0])
            line = ','.join(line)
            file_w.write(line)

    os.remove("temp.csv")
Beispiel #17
0
    def xlsx(self):
        kwargs = {
            'delimiter' : '\t',
            'skip_empty_lines' : True,
            'outputencoding': 'utf-8',
        }
        sheetid = 0
        xlsx2csv = Xlsx2csv(self.path.as_posix(), **kwargs)

        f = io.StringIO()
        try:
            xlsx2csv.convert(f, sheetid)
            f.seek(0)
            gen = csv.reader(f, delimiter='\t')
            # avoid first row sheet line
            next(gen)
            yield from gen
        except SheetNotFoundException as e:
            log.warning(f'Sheet weirdness in{self.path}')
            log.warning(str(e))
Beispiel #18
0
    def xlsx1(self):
        kwargs = {
            'delimiter': '\t',
            'skip_empty_lines': True,
            'outputencoding': 'utf-8',
            'hyperlinks': True,
        }
        sheetid = 1
        try:
            xlsx2csv = Xlsx2csv(self.path.as_posix(), **kwargs)
        except InvalidXlsxFileException as e:
            raise exc.NoDataError(f'{self.path}') from e

        ns = len(xlsx2csv.workbook.sheets)
        if ns > 1:
            message = f'too many sheets ({ns}) in {self.path.as_posix()!r}'
            if self.addError(exc.EncodingError(message),
                             blame='submission',
                             path=self.path):
                logd.error(message)

        f = io.StringIO()
        try:
            xlsx2csv.convert(f, sheetid)
            f.seek(0)
            gen = csv.reader(f, delimiter='\t')
            yield from gen
        except SheetNotFoundException as e:
            log.warning(f'Sheet weirdness in {self.path}')
            log.warning(str(e))
        except AttributeError as e:
            message = ('Major sheet weirdness (maybe try resaving, '
                       'probably a bug in the xlsx2csv converter)? '
                       f'in {self.path}')
            if self.addError(exc.EncodingError(message),
                             blame='submission',
                             path=self.path):
                #log.exception(e)
                logd.critical(message)
Beispiel #19
0
import os

from xlsx2csv import Xlsx2csv

INPUT_DIR = r'xlsx_input'
OUTPUT_DIR = r'xlsx_output'

for filename in os.listdir(INPUT_DIR):
    if filename.endswith(".xls") or filename.endswith(".xlsx"):
        name, file_extension = os.path.splitext(filename)
        input_file = os.path.join(INPUT_DIR, filename)
        output_file = os.path.join(OUTPUT_DIR, name) + ".csv"
        Xlsx2csv(input_file, outputencoding="utf-8").convert(output_file)
Beispiel #20
0
def process_file(service, fieldmap, report_config, file_id, report_time):
    working_dir = '/tmp/'
    out_file = io.FileIO(os.path.join(working_dir,
                                      report_config['stream_name'] + '.xlsx'),
                         mode='wb')

    # Create a get request.
    request = service.files().get_media(reportId=report_config['report_id'],
                                        fileId=file_id)

    # Create a media downloader instance.
    # Optional: adjust the chunk size used when downloading the file.
    downloader = http.MediaIoBaseDownload(out_file,
                                          request,
                                          chunksize=CHUNK_SIZE)

    # Execute the get request and download the file.
    download_finished = False
    while download_finished is False:
        _, download_finished = downloader.next_chunk()

    csv_file = os.path.join(working_dir, report_config['stream_name'] + '.csv')

    Xlsx2csv(os.path.realpath(out_file.name),
             outputencoding="utf-8").convert(csv_file)

    report_id = report_config['report_id']
    stream_name = report_config['stream_name']
    stream_alias = report_config['stream_alias']

    line_state = {'headers_line': False, 'past_headers': False, 'count': 0}

    report_id_int = int(report_id)

    #
    def line_transform(line):
        if line.startswith((' ', '\t')):
            return
        if not line_state['past_headers'] and not line_state[
                'headers_line'] and 'Report Fields' in line:
            line_state['headers_line'] = True
            return
        if line_state['headers_line']:
            line_state['headers_line'] = False
            line_state['past_headers'] = True
            return

        if line_state['past_headers']:
            row = parse_line(line)
            # skip report grant total line
            if row[0] == 'Grand Total:':
                return

            obj = {}
            for i in range(len(fieldmap)):
                field = fieldmap[i]
                obj[field['name']] = transform_field(field['type'], row[i])

            obj[SINGER_REPORT_FIELD] = report_time
            obj[REPORT_ID_FIELD] = report_id_int

            singer.write_record(stream_name, obj, stream_alias=stream_alias)
            line_state['count'] += 1

    with open(csv_file) as f:
        for line in f:
            line_transform(line)

    with singer.metrics.record_counter(stream_name) as counter:
        counter.increment(line_state['count'])
import requests
import json
import time
import configparser


config = configparser.ConfigParser()
config.read('./config.ini')


print("Assurez-vous d'avoir placer le fichier du jour au format XLSX dans ../data/aides/XLSX")

daytoprocess = sys.argv[1]


Xlsx2csv("../data/aides/xlsx/"+daytoprocess+".XLSX", outputencoding="utf-8").convert("../data/aides/csv/"+daytoprocess+".csv")

print("CSV généré")

df = pd.read_csv("../data/aides/csv/"+daytoprocess+".csv", dtype={"Période":str,"SIREN":str,"Cde postal": str,"Pays":str,"Montant":float,"Dev.":str,"Date paiement":str})
df = df.dropna(subset=['Date paiement'])
df.fillna({'SIREN':'WALLISETFUTUNA'}, inplace=True)

print("Dataframe aide chargé")

print(str(df.shape[0])+" lignes")

df['mois'] = df['Période'].apply(lambda x: x.split("-")[0])
df['volet'] = df['Période'].apply(lambda x: x.split("-")[1])
df = df.rename(columns={'SIREN': 'siren'})
Beispiel #22
0
    def convert_to_csv(self):
        """ Convert from xlsx to csv """

        Xlsx2csv(self.local_file + ".xlsx").convert(self.local_file + ".csv",
                                                    sheetid=1)
def get_reqs(xlsx_file):
    """Return a dictionary of PSMRequirements based on XSLX_FILE.
    Return a dict mapping PSM req IDs to PSMRequirement instances."""
    # This works by converting the requirements spreadsheet to CSV,
    # using xlsx2csv, and then using Python's built-in CSV reader.
    reqs = {}
    families_seen = set()

    if not os.path.exists(xlsx_file):
        raise ValueError(
            "ERROR: can't find requirements spreadsheet {}\n".format(
                xlsx_file))

    csv_fh = StringIO()
    Xlsx2csv(xlsx_file).convert(csv_fh, sheetid=0)
    csv_fh.seek(0)

    csv_reader = csv.reader(csv_fh)
    current_family = None  # two-letter req family code, e.g, "FR", etc
    current_category = None
    for row in csv_reader:
        if (len(row) == 1 and row[0].startswith("-------- ")):  # family row
            current_family = family_from_header(row[0])
            if current_family in families_seen:
                raise PSMRequirementFamilyException(
                    "ERROR: encountered family '%s' more than once" %
                    current_family)
            elif current_family is not None:
                families_seen.add(current_family)
        elif (len(row) > 1 and row[0] == "" and row[1] != ""):  # category row
            if current_family is not None:
                current_category = current_family + " " + row[1]
            else:
                current_category = None
        elif (len(row) >= 11 and current_family is not None and row[0] != ""
              and _req_id_re.match(row[0]) is not None):  # req row
            if current_family is None:
                warn("WARNING: got req \"%s \" while no family active" %
                     row[0])
            if current_category is not None:
                row[1] = current_category
            else:
                warn("WARNING: requirement '%s' has no category" % (row[0]))
            req = PSMRequirement(current_family, *row)
            if req.req_id in reqs:
                # Can't happen, but let's be extra careful.
                raise PSMRequirementException(
                    "ERROR: encountered req '%s' more than once" % req.req_id)
            reqs[req.req_id] = req
        elif (len(row) >= 11 and row[0] == 'Requirement ID Number'
              and row[1] == 'Requirement Category'
              and row[2] == 'Requirement Statement' and row[3] == 'Priority'
              and row[4] == 'Rank' and row[5] == 'Source'
              and row[6] == 'Source Document' and row[7] == 'Release'
              and row[8] == 'Design Reference'
              and row[9] == 'Acceptance Test Reference'
              and row[10] == 'Comment'):
            pass  # skip CSV header rows
        elif current_family is not None:
            warn("WARNING: not really sure what this row is:")
            warn("         %s" % row)
    return reqs
Beispiel #24
0
    def ConvertToCsvFile(self,
                         filePath,
                         outClientDir,
                         outServerDir,
                         allSheet=False,
                         tmpDir=None,
                         fileFmt="txt"):
        self._filePath = os.path.abspath(filePath)
        baseName = os.path.basename(filePath)
        fileName, ext = os.path.splitext(baseName)
        tempTxt = os.path.abspath("./temp.%s" % (fileFmt))
        if (tmpDir != None):
            tempTxt = os.path.abspath("%s/temp.%s" % (tmpDir, fileFmt))

        if (os.access(tempTxt, os.F_OK) == True):
            os.remove(tempTxt)
        txtFd = codecs.open(tempTxt, 'w', "utf-8")
        xlsx2csv = Xlsx2csv(self._filePath,
                            delimiter='\t',
                            hyperlinks=False,
                            dateformat=None,
                            sheetdelimiter=self.sheetdelimiter,
                            skip_empty_lines=False,
                            escape_strings=False,
                            cmd=False)
        if (allSheet == False):
            xlsx2csv.convert(txtFd, 1)
        else:
            xlsx2csv.convert(txtFd, 0)
        txtFd.close()
        clientOutputFilePath = None
        serverOutputFilePath = None
        txtFd = codecs.open(tempTxt, 'r', "utf-8")
        try:
            with txtFd as csvfile:
                csvReader = csv.reader(csvfile, delimiter='\t')
                if (allSheet == False):
                    self._parseFiledBelong(csvReader, Config.belongLine)
                    csvfile.seek(0)
                    self._parseMembersType(csvReader, Config.typeLine)
                else:
                    self._parseFiledBelong(csvReader, Config.belongLine + 1)
                    csvfile.seek(0)
                    self._parseMembersType(csvReader, Config.typeLine + 1)

                # save client
                if (outClientDir != None):
                    csvfile.seek(0)
                    outDirC = outClientDir
                    if not os.path.exists(outDirC):
                        os.makedirs(outDirC)
                    clientOutputFilePath = self._convertToFile(
                        csvReader, os.path.abspath(outDirC), fileName, "C",
                        True, fileFmt)
                    baseName = os.path.basename(filePath)
                    if baseName == "Language.xlsx":
                        splitLanguage = SplitLanguage(clientOutputFilePath,
                                                      outDirC)
                        splitLanguage.Run()
                        if os.path.exists(clientOutputFilePath):
                            os.remove(clientOutputFilePath)
                # save client
                if (outServerDir != None):
                    csvfile.seek(0)
                    outDirS = outServerDir
                    if not os.path.exists(outDirS):
                        os.makedirs(outDirS)
                    serverOutputFilePath = self._convertToFile(
                        csvReader, os.path.abspath(outDirS), fileName, "S",
                        False, fileFmt)
        except Exception, e:
            raise e
def main(args):
    dateMatch = re.search(r"""[0-9]+-[0-9]+-[0-9]+""", args['-d'])

    if dateMatch is None:
        print(
            "[ERROR] The input CineTV export folder must contain a date in the format: YYYY-MM-DD"
        )
        sys.exit(1)

    date = dateMatch.group(0)

    outputdir = f'{path.join(args["-o"], "cinetv-" + date)}'

    shutil.rmtree(outputdir)
    # if os.path.exists(outputdir):
    #     for f in glob(f'{outputdir}/*'):
    #         os.remove(f)
    #     os.rmdir(outputdir)

    for f in glob(path.join(args['-d'], '*.xlsx')):
        csvFile = path.join(args['-d'], f.replace('.xlsx', '') + '.csv')
        if args['--reload'] or not os.path.exists(csvFile):
            print(f'[INFO] Converting file {f} to {csvFile}')
            Xlsx2csv(f, skip_empty_lines=True,
                     outputencoding='utf-8').convert(csvFile)

    csvFilesPattern = path.join(args['-d'], '*.csv')

    Path(outputdir).mkdir(parents=True, exist_ok=True)
    cinetvDbPath = path.join(outputdir, f'cinetv-{date}.db')
    print(
        f'[INFO] Converting generated CSV files from {csvFilesPattern} to SQLite database at {cinetvDbPath}'
    )
    # if os.system(f'csvs-to-sqlite {csvFilesPattern} {cinetvDbPath}') != 0:
    #     return
    os.system(f'csvs-to-sqlite {csvFilesPattern} {cinetvDbPath}')

    cinetvExtDbPath = path.join(outputdir, f'cinetv-{date}-ext.db')
    cinetvExtAutoDbPath = path.join(outputdir, f'cinetv-{date}-ext-auto.db')
    if args["-e"] and args["-a"]:
        csvFilesExtPattern = path.join(args['-e'], '*.csv')
        print(
            f'[INFO] Converting CineTV CSV files ({csvFilesPattern}) + CineTV extension CSV files ({csvFilesExtPattern}) to SQLite database at {cinetvExtDbPath}'
        )
        os.system(
            f'csvs-to-sqlite {csvFilesPattern} {csvFilesExtPattern} {cinetvExtDbPath}'
        )

        print(
            f'[INFO] Updating CineTV automatically generated extension data...'
        )
        os.system(
            f'cinetvlinking-exe filmo -d {cinetvExtDbPath} -o {args["-a"]}')
        os.system(
            f'cinetvlinking-exe nom apply -d {cinetvExtDbPath} -o {args["-a"]}'
        )

        csvFilesExtAutoPattern = path.join(args['-a'], '*.csv')
        print(
            f'[INFO] Converting CineTV CSV files ({csvFilesPattern}) + CineTV extension CSV files ({csvFilesExtPattern}) + CineTV automatically generated CSV files {csvFilesExtAutoPattern} to SQLite database at {cinetvExtAutoDbPath}'
        )
        # os.system(f'csvs-to-sqlite --replace-tables {csvFilesPattern} {csvFilesExtPattern} {csvFilesExtAutoPattern} {cinetvExtAutoDbPath}')
        os.system(
            f'csvs-to-sqlite {csvFilesPattern} {csvFilesExtPattern} {cinetvExtAutoDbPath}'
        )
        for f in glob(path.join(args['-a'], '*.csv')):
            tablename = path.splitext(path.basename(f))[0]
            os.system(
                f'csvs-to-sqlite -pk NomID -t {tablename} {f} {cinetvExtAutoDbPath}'
            )

        print(
            f'[INFO] Creating a public subset of CineTV database {cinetvExtAutoDbPath} at {outputdir}'
        )
        os.system(f'cinetv2public-exe -s {cinetvExtAutoDbPath} -d {outputdir}')

    elif args["-e"] and not args["-a"]:
        csvFilesExtPattern = path.join(args['-e'], '*.csv')
        print(
            f'[INFO] Converting CineTV CSV files ({csvFilesPattern}) + CineTV extension CSV files ({csvFilesExtPattern}) to SQLite database at {cinetvExtDbPath}'
        )
        os.system(
            f'csvs-to-sqlite {csvFilesPattern} {csvFilesExtPattern} {cinetvExtDbPath}'
        )

        print(
            f'[INFO] Creating a public subset of CineTV database {cinetvExtDbPath} at {outputdir}'
        )
        os.system(f'cinetv2public-exe -s {cinetvExtDbPath} -d {outputdir}')

    elif not args["-e"] and args["-a"]:
        print(
            f'[INFO] Updating CineTV automatically generated extension data...'
        )
        os.system(f'cinetvlinking-exe filmo -d {cinetvDbPath} -o {args["-a"]}')
        os.system(
            f'cinetvlinking-exe nom apply -d {cinetvDbPath} -o {args["-a"]}')

        csvFilesExtAutoPattern = path.join(args['-a'], '*.csv')
        print(
            f'[INFO] Converting CineTV CSV files ({csvFilesPattern}) + CineTV automatically generated CSV files {csvFilesExtAutoPattern} to SQLite database at {cinetvExtAutoDbPath}'
        )
        os.system(
            f'csvs-to-sqlite {csvFilesPattern} {csvFilesExtAutoPattern} {cinetvExtAutoDbPath}'
        )

        print(
            f'[INFO] Creating a public subset of CineTV database {cinetvExtAutoDbPath} at {outputdir}'
        )
        os.system(f'cinetv2public-exe -s {cinetvExtAutoDbPath} -d {outputdir}')

    else:
        print(
            f'[INFO] Creating a public subset of CineTV database {cinetvDbPath} at {outputdir}'
        )
        os.system(f'cinetv2public-exe -s {cinetvDbPath} -d {outputdir}')

    cinetvPublicDbPath = path.join(outputdir, f'cinetv-{date}-publique.db')

    print("[INFO] Exporting SQLite database to CSV files...")
    cinetvCsvPath = path.join(outputdir, "csv")
    Path(cinetvCsvPath).mkdir(parents=True, exist_ok=True)
    os.system(
        f'sqlite-dump-to-csv --db {cinetvPublicDbPath} --output {cinetvCsvPath}'
    )

    print("[INFO] Generating a tarball from CSV files...")
    cinetvCsvTarPath = path.join(outputdir, f'cinetv-{date}-csv.tar.gz')
    with tarfile.open(cinetvCsvTarPath, "w:gz") as tar:
        for f in glob(f'{cinetvCsvPath}/*'):
            tar.add(f, arcname=f"cinetv-{date}/{path.basename(f)}")

    print("[INFO] Generating a tarball from SQLite DB file...")
    cinetvPublicDbTarPath = path.join(outputdir,
                                      f'cinetv-{date}-sqlite.tar.gz')
    with tarfile.open(cinetvPublicDbTarPath, "w:gz") as tar:
        tar.add(cinetvPublicDbPath, arcname=f"cinetv-{date}/cinetv-{date}.db")
Beispiel #26
0
parser = argparse.ArgumentParser()
parser.add_argument('-i',
                    '--input',
                    metavar='FILENAME',
                    help='name of input file (REQUIRED)',
                    required=True)
parser.add_argument('-k',
                    '--knp',
                    metavar='FILENAME',
                    help='The list of knp terms (REQUIRED)',
                    required=True)
args = parser.parse_args()

#convert the input xlsx file to csv
try:
    Xlsx2csv(args.input, outputencoding="utf-8").convert("temp.csv")
except FileNotFoundError:
    sys.exit('Unable to open {}. Does the file exist?'.format(args.input))
except PermissionError:
    sys.exit('Unable to open {}. Check your file permissions.'.format(
        args.input))
except:
    sys.exit('Something broke trying to convert the excel file into csv.')

#dump the knp terms into a list
try:
    knp_terms = list()
    with open(args.knp, 'r') as knp_list:
        content = knp_list.readlines()
        for line in content:
            knp_terms.append(line)
Beispiel #27
0
from xlsx2csv import Xlsx2csv

MIN_SALARY = 15080

name = sys.argv[1]
url = sys.argv[2]

# download data
print("Downloading data for " + name)
xlsx_path = wget.download(url, out="data/{}.xlsx".format(name))

# convert to csv
print()
print("Converting data/{}.xlsx to data/{}.csv".format(name, name))
csv_path = "data/{}.csv".format(name)
Xlsx2csv(xlsx_path, outputencoding="utf-8").convert(csv_path)

# save to db
print("Saving {} to database".format(name))
print("Reading file: " + csv_path)
sheet = pd.read_csv(csv_path)
print("Renaming columns...")
sheet.rename(
    columns={
        "PERIOD_OF_EMPLOYMENT_START_DATE": "EMPLOYMENT_START_DATE",
        "PREVAILING_WAGE_1": "PREVAILING_WAGE",
    },
    inplace=True,
)

PERSON_FIELDS = [
import pandas as pd
from xlsx2csv import Xlsx2csv
# Create empty table
path = r'C:/Users/H395978/AppData/Local/Programs/Thesis/stamm/H164632.GLOBAL/'
#output_table.to_csv(path+'export_router_0001.csv',encoding='utf-8-sig',sep=',',index=False)
Xlsx2csv(path + 'export_assy_0001.xlsx',
         outputencoding="utf-8").convert("assy.csv")
import pandas as pd
from xlsx2csv import Xlsx2csv
import sys

daytoprocess = sys.argv[1]

print("Convert XLSX to CSV")

Xlsx2csv("../data/reports/xlsx/" + daytoprocess + ".xlsx",
         outputencoding="utf-8").convert("../data/reports/csv/" +
                                         daytoprocess + ".csv")

print("Load csv")

df = pd.read_csv("../data/reports/csv/" + daytoprocess + ".csv", header=3)

print("Drop useless column")

df = df.drop(columns={'Libellé A21'})

dep = [
    '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12',
    '13', '14', '15', '16', '17', '18', '19', '21', '22', '23', '24', '25',
    '26', '27', '28', '29', '2A', '2B', '30', '31', '32', '33', '34', '35',
    '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47',
    '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59',
    '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71',
    '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83',
    '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95',
    '971', '972', '973', '974', '976', 'ND', 'total'
]
Beispiel #30
0
    if not csv_path or not xlsx_path:
        raise Exception("Falta una ruta")
    f_ = open(csv_path, 'r', encoding='latin-1')
    df = pd.read_csv(f_, sep=sep)
    df.to_excel(xlsx_path, index=None, header=with_header)
    f_.close()

if module == "xlsxToCsv":
    csv_path = GetParams("csv_path")
    xlsx_path = GetParams("xlsx_path")
    delimiter = GetParams("delimiter")

    try:
        if not delimiter:
            delimiter = ","
        Xlsx2csv(xlsx_path, outputencoding="utf-8",
                 delimiter=delimiter).convert(csv_path)
    except Exception as e:
        PrintException()
        raise e

if module == "countColumns":

    excel = GetGlobals("excel")

    sheet = GetParams("sheet")
    result = GetParams("var_")

    try:
        excel_path = excel.file_["default"]["path"]
        print(excel_path)
        df = pd.read_excel(excel_path, sheetname=sheet)