def shammun_dbf_files(root_folder = 'shammun_geography_dbf', division_file='division.dbf',
                      district_file='district.dbf', upazila_file='upazila.dbf'):
    division_file_path = os.path.join(root_folder, division_file)
    district_file_path = os.path.join(root_folder, district_file)
    upazila_file_path = os.path.join(root_folder, upazila_file)

    division, district, upazila = set(), set(), set()
    under_division, under_district = defaultdict(list), defaultdict(list)
    for record in DBF(division_file_path, parserclass=MyFieldParser):
        div = shamnun_get_div(record)
        division.add(div)

    print(division)
    for record in DBF(district_file_path, parserclass=MyFieldParser):
        div = shamnun_get_div(record)
        if div not in division: raise Exception('Incorrect division in district.dbf', div)
        dist = shamnun_get_dist(record)
        district.add(dist)
        under_division[div].append(dist)

    print(district)
    for record in DBF(upazila_file_path, parserclass=MyFieldParser):
        div = shamnun_get_div(record)
        if div not in division: raise Exception('Incorrect division in district.dbf', div)
        dist = shamnun_get_dist(record)
        if dist not in district: raise Exception('Incorrect district in district.dbf', dist)
        upa = shamnun_get_upa(record)
        upazila.add(upa)
        under_district[dist].append(upa)
    print(upazila)
# shammun_dbf_files()
Exemple #2
0
def dbf2csv(dbfname,csvname):
   reader=csv.reader(open('OUCHNSYS.EAS_DIC_GENDER.txt','r',encoding='utf-16'))
   gender={rows[1]:rows[0] for rows in reader}
   reader=csv.reader(open('OUCHNSYS.EAS_DIC_ETHNICNAME.txt','r',encoding='utf-16'))
   ethnic={rows[1]:rows[0] for rows in reader}
   reader=csv.reader(open('OUCHNSYS.EAS_DIC_POLITICALSTATUS.txt','r',encoding='utf-16'))
   polica={rows[1]:rows[0] for rows in reader}
   with open(csvname,'w',newline='') as csvfile:
         wr =  csv.writer(csvfile,quoting=csv.QUOTE_NONE,quotechar='',escapechar='\\')
         table = DBF(dbfname)
         table.encoding='gb18030'
         fidnames = table.field_names
         for row in table:
              dic = {key:'' for key in fidnames}
              
              #print('begin')
              for x in fidnames:
                  dic[x]=row[x]
                  if x=='XB':
                    dic[x]=gender[row[x]]
                  elif x=='MZ':
                    dic[x]=ethnic[row[x]]
                  elif x=='ZZMM':
                    dic[x]=polica[row[x]]
              csvlist=[dic[x] for x in ['KSH','XH','XM','CSRQ','SFZH','MZ','ZZMM','XB']]
              wr.writerow(csvlist)
Exemple #3
0
def TRMM():
    workbook = xlwt.Workbook(encoding='ascii')
    worksheet = workbook.add_sheet('IMERG')
    worksheet1 = workbook.add_sheet('TRMM')
    worksheet2 = workbook.add_sheet('RG')
    for y in range(15, 18):
        dbf = DBF("F:/MSA/RAIN/YE/" + "IYE" + str(2000 + y) + ".dbf",
                  load=True)
        worksheet.write((y - 15), 0, dbf.records[0]['CenterX'])
        worksheet.write((y - 15), 1, dbf.records[0]['CenterY'])
        worksheet.write((y - 15), 2, dbf.records[0]['XStdDist'])
        worksheet.write((y - 15), 3, dbf.records[0]['YStdDist'])
        worksheet.write((y - 15), 4, dbf.records[0]['Rotation'])

        dbf1 = DBF("F:/MSA/RAIN/YE/" + "TYE" + str(2000 + y) + ".dbf",
                   load=True)
        worksheet1.write((y - 15), 0, dbf1.records[0]['CenterX'])
        worksheet1.write((y - 15), 1, dbf1.records[0]['CenterY'])
        worksheet1.write((y - 15), 2, dbf1.records[0]['XStdDist'])
        worksheet1.write((y - 15), 3, dbf1.records[0]['YStdDist'])
        worksheet1.write((y - 15), 4, dbf1.records[0]['Rotation'])

        dbf2 = DBF("F:/MSA/RAIN/YE/" + "RYE" + str(2000 + y) + ".dbf",
                   load=True)
        worksheet2.write((y - 15), 0, dbf2.records[0]['CenterX'])
        worksheet2.write((y - 15), 1, dbf2.records[0]['CenterY'])
        worksheet2.write((y - 15), 2, dbf2.records[0]['XStdDist'])
        worksheet2.write((y - 15), 3, dbf2.records[0]['YStdDist'])
        worksheet2.write((y - 15), 4, dbf2.records[0]['Rotation'])
    out = "F:/MSA/RAIN/" + "test.xls"
    workbook.save(out)
Exemple #4
0
def parser(request):
    """
    Автор:Козлов Даниил
    Цель: Обновление БД
    :param request:
    :return: обновленную базу данных в фромате DataFrame
    """
    result = requests.get(
        'https://cbr.ru/banking_sector/otchetnost-kreditnykh-organizaciy/')
    html = result.text
    soup = BeautifulSoup(html)
    # читаем старую базу данных (в CSV)
    data = pd.read_csv('BD.csv')
    # парсинг сайта ЦБ, находим, есть ли данные на определённый период времени, если нет, то s == []
    s2 = soup.find_all('a', href="/vfs/credit/forms/102-20200401.rar")
    s1 = soup.find_all('a', href="/vfs/credit/forms/102-20200101.rar")
    s3 = soup.find_all('a', href="/vfs/credit/forms/102-20200701.rar")
    s4 = soup.find_all('a', href="/vfs/credit/forms/102-20201001.rar")
    if s1 == []:
        print('Нет данных на 01.01 2020')
    elif s2 == []:
        print('Нет данных на 04.01 2020')
    elif s3 == []:
        print('Нет данных на 07.01 2020')
        # Выгружаем данные за прошлый период времени
        # Необходимо скачать rar архив с сайта ЦБ по ссылке https://cbr.ru/vfs/credit/forms/102-20200401.rar
        # и распаковать оттуда два файла NP1 и P1
        table1 = DBF('22020NP1.DBF', load=True, encoding='cp866')
        frame1 = pd.DataFrame(iter(table1))
        table2 = DBF('22020_P1.DBF', load=True, encoding='cp866')
        frame2 = pd.DataFrame(iter(table2))
        frame2 = frame2.fillna(0)
        result = pd.merge(frame1, frame2, on='REGN')
        new = result.groupby(['NAME_B']).sum().reset_index()
        new = new.drop(['REGN'], axis=1)
        new_result = pd.merge(new, frame1, on='NAME_B')
        new_result['DT'] = '2020-04-01'
        final = pd.concat([new_result, data], ignore_index=True)
        final.to_csv('BD.csv')
    elif s4 == []:
        # Необходимо скачать rar архив с сайта ЦБ по ссылке https://cbr.ru/vfs/credit/forms/102-20200701.rar
        # и распаковать оттуда два файла NP1 и P1
        print('Нет данных на 10.01 2020')
        table1 = DBF('32020NP1.DBF', load=True, encoding='cp866')
        frame1 = pd.DataFrame(iter(table1))
        table2 = DBF('32020_P1.DBF', load=True, encoding='cp866')
        frame2 = pd.DataFrame(iter(table2))
        frame2 = frame2.fillna(0)
        result = pd.merge(frame1, frame2, on='REGN')
        new = result.groupby(['NAME_B']).sum().reset_index()
        new = new.drop(['REGN'], axis=1)
        new_result = pd.merge(new, frame1, on='NAME_B')
        new_result['DT'] = '2020-07-01'
        final = pd.concat([new_result, data], ignore_index=True)
        final.to_csv('BD.csv')
    return final
Exemple #5
0
def test_missing_memofile():
    with raises(MissingMemoFile):
        DBF('tests/cases/no_memofile.dbf')

    # This should succeed.
    table = DBF('tests/cases/no_memofile.dbf', ignore_missing_memofile=True)

    # Memo fields should be returned as None.
    record = next(iter(table))
    assert record['MEMO'] is None
Exemple #6
0
    def __init__(self, filename, encoding=None, ignorecase=True,
                 lowernames=False, parserclass=FieldParser,
                 recfactory=collections.OrderedDict,
                 load=False, raw=False, ignore_missing_memofile=False,
                 char_decode_errors='strict'):

        if isinstance(filename, str):
            DBF.__init__(self, filename, encoding=encoding, ignorecase=ignorecase,
                         lowernames=lowernames, parserclass=parserclass,
                         recfactory=recfactory, load=load,
                         raw=raw, ignore_missing_memofile=ignore_missing_memofile,
                         char_decode_errors=char_decode_errors)
        else:
            self.encoding = encoding
            self.ignorecase = ignorecase
            self.lowernames = lowernames
            self.parserclass = parserclass
            self.raw = raw
            self.ignore_missing_memofile = ignore_missing_memofile
            self.char_decode_errors = char_decode_errors

            if recfactory is None:
                self.recfactory = lambda items: items
            else:
                self.recfactory = recfactory

            self.name = None
            self.filename = None
            self.content = filename

            self._records = None
            self._deleted = None

            # Filled in by self._read_headers()
            self.memofilename = None
            self.header = None
            self.fields = []       # namedtuples
            self.field_names = []  # strings

            obj = BytesIO(filename)
            self._read_header(obj)
            self._read_field_headers(obj)
            self._check_headers()

            try:
                self.date = datetime.date(expand_year(self.header.year),
                                          self.header.month, self.header.day)
            except ValueError:  # pragma: no cover
                # Invalid date or '\x00\x00\x00'.
                self.date = None

            self.memofilename = self._get_memofilename()

            if load:
                self.load()
Exemple #7
0
def download_table_dbf(file_name):
    """
    Realiza o download de um arquivo auxiliar de dados do SINASC em formato "dbf" ou de uma pasta
    "zip" que o contém (se a pasta "zip" já não foi baixada), em seguida o lê como um objeto pandas
    DataFrame e por fim o elimina

    Parâmetros
    ----------
    file_name: objeto str
        String do nome do arquivo "dbf"

    Retorno
    -------
    df: objeto pandas DataFrame
        Dataframe que contém os dados de um arquivo auxiliar de dados originalmente em formato "dbf"
    """

    ftp = FTP('ftp.datasus.gov.br')
    ftp.login()
    fname = file_name + '.DBF'

    try:
        if file_name == 'CNESDN18':  # Esse arquivo "dbf" deveria estar no diretório
            # ".../dissemin/publicos/SINASC/NOV/TABELAS/" do endereço "ftp"
            ftp.cwd('/dissemin/publicos/SINASC/NOV/TAB/')
            folder = 'NASC_NOV_TAB.zip'
            ftp.retrbinary(f'RETR {folder}', open(folder, 'wb').write)
            zip = ZipFile(folder, 'r')
            zip.extract(fname)

        elif file_name == 'rl_municip_regsaud':
            folder = 'base_territorial.zip'
            ftp.cwd('/territorio/tabelas/')
            ftp.retrbinary(f'RETR {folder}', open(folder, 'wb').write)
            zip = ZipFile(folder, 'r')
            fname = file_name + '.dbf'
            zip.extract(fname)

        else:
            ftp.cwd('/dissemin/publicos/SINASC/NOV/TABELAS/')
            ftp.retrbinary(f'RETR {fname}', open(fname, 'wb').write)
    except:
        raise Exception(f'Could not access {fname}.')

    if ((file_name == 'CNESDN18') or (file_name == 'TABOCUP')
            or (file_name == 'CID10')):
        dbf = DBF(fname, encoding='iso-8859-1')
    else:
        dbf = DBF(fname)

    df = pd.DataFrame(iter(dbf))

    os.unlink(fname)

    return df
Exemple #8
0
def convertDBF(sqliteCon, dbfFilename, dbfTableName, tableDescription):
    a = readDBF(dbfFilename)
    return
    convertedFields = []
    cursor = sqliteCon.cursor()
    cursor.execute("BEGIN TRANSACTION")
    dbfFields = DBF(dbfFilename).fields
    createString = "CREATE TABLE '" + dbfTableName + "' ('fid' INTEGER PRIMARY KEY AUTOINCREMENT "
    firstField = True
    for fieldno in range(len(dbfFields)):
        # add column
        field = dbfFields[fieldno]
        convertedFields.append(field)
        createString += ','
        createString += "'" + field.name + "' "
        createFieldTypeString = "TEXT"
        if (field.type == 'F' or field.type == 'O' or field.type == 'N'):
            createFieldTypeString = "REAL"
        elif (field.type == 'I'):
            createFieldTypeString = "INTEGER"
        firstField = False
        createString += createFieldTypeString
    createString += ")"
    #print(createString)

    cursor.execute(createString)

    contentsString = "insert into gpkg_contents (table_name,data_type,identifier,description,last_change) VALUES(?,'attributes',?,?,strftime('%Y-%m-%dT%H:%M:%fZ','now'))"
    contentsAttrs = (dbfTableName, dbfTableName,
                     dbfTableName + " " + tableDescription)
    cursor.execute(contentsString, contentsAttrs)

    for record in DBF(dbfFilename):
        #print(record)
        insertValues = []
        insertValuesString = ""
        insertString = ""

        for key, value in record.items():
            if (len(insertString) > 0):
                insertString += ","
                insertValuesString += ","
            else:
                insertString = "INSERT INTO " + dbfTableName + " ("
                insertValuesString += " VALUES ("
            insertString += key
            insertValues.append(value)
            insertValuesString += "?"
        insertValuesString += ")"
        insertString += ") "
        insertString += insertValuesString
        #print(insertString)
        cursor.execute(insertString, tuple(insertValues))
    cursor.execute("COMMIT TRANSACTION")
    return convertedFields
 def __init__(self, barcode, barcodes, liqcode):
     self.price = None
     self.qty = None
     self.brand = ''
     self.desc = ''
     self.barcode = barcode.upper()
     self.codeNum = ''
     self.singlePrice = None
     self.casePrice = None
     self.barcodes = DBF(barcodes, encoding='latin-1')
     self.liqcode = DBF(liqcode, encoding='latin-1')
     self.deposit = None
     self.dep = False
     self.found = False
def readDBF(dbfFilename):
    cNameRecords = {}

    dbfFields = DBF(dbfFilename).fields

    for record in DBF(dbfFilename, load=True):
        recordFields = {}

        for field in record.keys():
            recordFields[field] = record[field]
            #print(record)

        cNameRecords[record['CNAM']] = recordFields

    return cNameRecords
Exemple #11
0
def read_dbf(dbf_file):
    class MyFieldParser(FieldParser):
        def parse(self, field, data):
            try:
                return FieldParser.parse(self, field, data)
            except ValueError:
                return None
    try:
        dbf = DBF(dbf_file)
        table = pd.DataFrame(iter(dbf))
    except ValueError:
        dbf = DBF(dbf_file, parserclass=MyFieldParser)
        table = pd.DataFrame(iter(dbf))
    table.rename(columns={column: column.lower() for column in table.columns}, inplace=True)
    return table
Exemple #12
0
def main():
    """
    Step 1.
      Creating copies of a file using names from an excel sheet

    """
    file_to_copy_name = "some.file"
    path_to_copy_file = os.path.join(get_input_dir(), file_to_copy_name)

    workbook_path = os.path.join(get_input_dir(),
                                 "workbook_with_file_names.xlsx")
    excel_workbook = xlrd.open_workbook(workbook_path)
    sheet_with_file_names = excel_workbook.sheet_by_index(0)
    file_names = sheet_with_file_names.row_values(0)
    print("found these file-names in excel", file_names)

    for name in file_names:
        output_directory = get_output_dir()
        path_to_destination_file = os.path.join(output_directory, name)
        shutil.copyfile(src=path_to_copy_file, dst=path_to_destination_file)
        print("successfully wrote [%s] to [%s]" \
              %(path_to_copy_file, path_to_destination_file))
    """
    Step 2.
      Editing .dbf files in two directories
      I can't test this as I don't have .dbf files :/
      The idea is just to iterate over the 'zone' files, read a row and write
      that row to the 'surroundings' file
    """
    dbf_zones_directory = os.path.join(get_input_dir(), "zone_files")
    dbf_surroundings_directory = os.path.join(get_input_dir(),
                                              "surroundings_files")
    zone_files = os.listdir(dbf_zones_directory)

    for zone in zone_files:
        zone_file_path = os.path.join(dbf_zones_directory, zone)
        surrounding_file_path = \
            os.path.join(dbf_surroundings_directory, zone.replace("zone", "surroundings"))
        assert os.path.exists(zone_file_path), \
            "missing .dbf zone file at absolute path [%s]" %check_file

        zone_table = DBF.read(zone_file_path)
        for record in zone_table:
            print(record)

        surrounding_table = DBF.read(surrounding_file_path)
        for record in surrounding_table:
            print(record)
Exemple #13
0
    def merge_dbf(file1, file2):

        table1 = DBF(file1)
        records1 = list(table1)

        table2 = DBF(file2)
        records2 = list(table2)

        # Delete unnecessary fields from lists
        for records in [records1, records2]:

            for record in records:
                del record['INTERRUPT']
                if record['Suma opadu'] < 0:
                    record['Suma opadu'] = 0.0

        # Check is it possible to  connect lists
        # Find variable last record, change add one hour
        # Make variable next record
        # Measurements are made every hour
        last_record = records1[-1]
        next_hour = unicode(int(last_record['TIME'][:2]) + 1)
        new_day = last_record['DATE']
        if len(next_hour) == 1:
            next_hour = '0' + next_hour

        # Check is it new day
        elif next_hour == '24':
            next_hour = unicode('00')
            new_day = last_record['DATE'] + datetime.timedelta(days=1)
        else:
            pass

        # Specify search date and search day
        search_date = new_day
        search_time = next_hour + last_record['TIME'][2:]

        # Search date and time to connect values
        for index, record in enumerate(records2):
            if (record['DATE'] == search_date
                    and record['TIME'] == search_time):
                print("Possible to connect")
                # TODO Check this case
                records2 = records2[index:]
                merged_list = records1 + records2
                break

        return merged_list
Exemple #14
0
    def process(self, products, overwrite=False, **kwargs):
        for asset_type, asset in self.assets.iteritems():
            if asset_type != _cdlmkii:  # with older cdl products, the asset is the product
                continue

            fname = self.temp_product_filename(_cdl, _cdlmkii)
            fname_without_ext, _ = os.path.splitext(fname)

            with ZipFile(asset.filename, 'r') as zipfile:
                for member in zipfile.infolist():
                    member_ext = member.filename.split('.', 1)[1]
                    extracted = zipfile.extract(member, fname_without_ext)
                    os.rename(extracted, fname_without_ext + '.' + member_ext)

            image = GeoImage(fname, True)
            image[0].SetNoData(0)
            image = None

            image = gdal.Open(fname, gdal.GA_Update)
            dbf = DBF(fname + '.vat.dbf')
            for i, record in enumerate(dbf):
                image.SetMetadataItem(str("CLASS_NAME_%s" % record['CLASS_NAME']), str(i))
            image = None

            archive_fp = self.archive_temp_path(fname)
            self.AddFile(_cdl, _cdl, archive_fp)
Exemple #15
0
def extract_files(data_folder, filters):
    corrupted_files = []
    database_folder = os.path.join(data_folder, '.database')
    os.makedirs(database_folder, exist_ok=True)
    data_files = glob.glob(
        os.path.join(data_folder, '*.dbc'))
    for filename in tqdm(data_files):
        converted = dbc2dbf_single(filename)
        if not converted:
            continue
        db = converted['database']
        database = os.path.join(database_folder, db)
        try:
            dbf = DBF(converted['filename'])
        except ValueError:
            corrupted_files.append(converted['filename'])
        except Exception as e:
            # print(f"Problem file {filename}")
            continue
        if not dbf:
            continue
        try: 
            df = pd.DataFrame(dbf, columns=dbf.field_names)
            df['month'] = converted['month']
            df['year'] = converted['year']
            df['uf'] = converted['uf']
            with pd.HDFStore(database) as hdf:
                if db in hdf.keys():
                    hdf.append(db, df, data_columns=True)
                else:
                    hdf.put(db, df, data_columns=True)
        except Exception as e:
            print(e)
Exemple #16
0
def import_db(db):
    try:
        for record in DBF(db):
            values = get_values(record, fields)
            print(insert.format(*values))
    except:
        pass
def read_dbc(filename, signature='utf-8'):
    """
    Descompacta um arquivo "dbc" para "dbf", em seguida o lê como tal e por fim o converte em um objeto
    pandas DataFrame e elimina os dois arquivos.

    Parâmetros
    ----------
    filename: objeto str
        String do nome do arquivo "dbc"
    signature: objeto str
        String do nome do formato de encoding do arquivo "dbc"

    Retorno
    -------
    df: objeto pandas DataFrame
        Dataframe que contém os dados de um arquivo principal de dados originalmente em formato "dbc"
    """

    if isinstance(filename, str):
        filename = filename.encode()
    with NamedTemporaryFile(delete=False) as tf:
        dbc2dbf(filename, tf.name.encode())
        dbf = DBF(tf.name, encoding=signature)
        df = pd.DataFrame(list(dbf))
    os.unlink(tf.name)
    os.unlink(filename)

    return df
Exemple #18
0
 def __init__(self, dbf_fname, ano, encoding="iso=8859-1"):
     """
     Instancia Objeto SINAN carregando-o a partir do arquivo indicado
     :param dbf_fname: Nome do arquivo dbf do Sinan
     :param ano: Ano dos dados
     :return:
     """
     self.ano = ano
     self.dbf = DBF(dbf_fname, encoding=encoding)
     self.colunas_entrada = self.dbf.field_names
     self.tabela = pd.DataFrame(list(self.dbf))
     self.tabela.drop_duplicates('NU_NOTIFIC', keep='first', inplace=True)
     if "ID_MUNICIP" in self.tabela.columns:
         self.geocodigos = self.tabela.ID_MUNICIP.dropna().unique()
     elif "ID_MN_RESI" in self.tabela.columns:
         # print(self.tabela.columns)
         self.geocodigos = self.tabela.ID_MN_RESI.dropna().unique()
         self.tabela["ID_MUNICIP"] = self.tabela.ID_MN_RESI
         del self.tabela['ID_MN_RESI']
     self._parse_date_cols()
     if not (self.time_span[0].year == self.ano
             and self.time_span[1].year == self.ano):
         raise ValidationError(
             _("Existem nesse arquivo notificações "
               "incompatíveis com o ano de notificação informado. "
               "Por favor, tenha certeza de que o ano de notificação é o mesmo "
               "para todos os registros no arquivo e de que este foi o ano "
               "informado no momento do envio."))
Exemple #19
0
def read_one_dbf_file(rows, path):
    table = DBF(path, load=True)
    times = 0

    for record in table.records:
        if times > 10:
            break
        row = []
        # change category type to number
        if not category_to_number.has_key(record['CATEGORY']):
            continue
        row.append(category_to_number[record['CATEGORY']])
        # print category_number[record['CATEGORY']]
        row.append(record['CALL_GROUP'])

        if not final_case_type_to_number.has_key(record['final_case']):
            print 'missed final_case type:'
            print record['final_case']
        row.append(final_case_type_to_number[record['final_case']])

        row.append(record['CASE_DESC'])
        row.append(record['occ_date'])
        row.append(record['x_coordina'])
        row.append(record['y_coordina'])

        row.append(record['census_tra'])
        #times = times + 1
        #print row
        rows.append(row)
    return rows
Exemple #20
0
    def dbf_to_xlsx(self):
        """
        Opens and read DBF file, using specified encoding in file_encoding
        variable, then 
        """

        workbook = xlsxwriter.Workbook('C:\\\\zakaz\\zakaz_dbf.xlsx')
        worksheet = workbook.add_worksheet()
        #formats
        header_format = workbook.add_format({'bold': False})

        column_letter = 65

        with DBF('C:\\\\zakaz\\1.dbf',
                 encoding=self.file_encoding) as dbf_table:
            heads = list(list(dbf_table)[0].keys())
            for head in heads:
                worksheet.write(
                    str(chr(column_letter)) + '1', head, header_format)
                column_letter += 1

            row = 1
            col = 0
            for record in dbf_table:
                for head in heads:
                    worksheet.write(row, col, record[head])
                    col += 1
                col = 0
                row += 1
Exemple #21
0
def dbf_open(infile, load=False, verbose=False):
    ''' dbf_open() - Open a DBF file

    Parameters
    ----------
    infile : str
        Name of existing DBF file
    
    load : bool, default=False
        Read into memory?
    
    verbose : bool, default=False
        Turn command-line output on or off

    Returns
    -------
    db: obj
        Database

    '''
    from dbfread import DBF

    db = DBF(infile, load=load)
    if verbose:
        print(f'   Opened file {infile}, contains {len(db):,} records')
    return db
def data_view(file_path,
              worksheet,
              nb_rows_beg,
              nb_rows_end,
              nb_col_max=10,
              **kwargs):

    # pd.set_option('display.height', 1000)
    # pd.set_option('display.max_rows', 2500)
    # pd.set_option('display.max_columns', 1500)
    # pd.set_option('display.width', 2000)

    if is_correct_kwargs({'type': ['excel', 'csv', 'geo']}, kwargs) == True:
        print(True)
    else:
        print(False)

    # set_max_columns(nb_col_max)
    type_of_file = kwargs.get("type")

    if type_of_file == "excel":
        results = pd.read_excel(file_path, sheet_name=worksheet)
    elif type_of_file == "csv":
        results = pd.read_csv(file_path)
    elif type_of_file == "geo":
        dbf = DBF(file_path)
        results = pd.DataFrame(iter(dbf))
    else:
        results = pd.read_csv(file_path)

    head = results.head(nb_rows_beg)
    tail = results.tail(nb_rows_end)
    display_dataframe(head, tail)
Exemple #23
0
def geoms(input: 'Name of the files to be converted (dbf, shp)',
          output: 'Name of the output GeoJSON file' = 'geometries') -> int:
    """
    Convert ArcGIS data files into a GeoJSON output

    The input files must all have the same name

    Ex: python factory.py geoms resources/precinct_files/May2016Precinct_region.shp
    """
    # Remove the file ext
    input = '.'.join(input.split('.')[:-1])
    # Import meta data and shapes
    dbf = DBF(input + '.dbf')
    shapes = shapefile.Reader(input + '.shp').shapes()
    if len(dbf) != len(shapes):
        raise Exception('Files do not have the same number of elements')
    # Format shape files into an ID key dict of GeoJSON objects
    features = []
    for meta, shape in zip(dbf, shapes):
        if shape.shapeType == shapefile.POLYGONZ:
            features.append({
                'type': 'Feature',
                'geometry': {
                    'type': 'Polygon',
                    'coordinates': [shape.points]
                },
                'properties': {k.lower(): v
                               for k, v in meta.items()}
            })
    # Export as a JSON file
    json.dump({
        'type': 'FeatureCollection',
        'features': features
    }, open(output + '.geojson', 'w'))
    return 0
 def createAttributeTable(self, dbf_path, field_name):
     attr_table = {}
     for row in DBF(dbf_path):
         if len(row) < 3:
             return None
         attr_table.update({row.items()[0][1]: [row.items()[-1][1]]})
     return attr_table
Exemple #25
0
def get_NY_StreetsGraph():

    dir  = "/home/ingared/Documents/NS_IP/UndergroundData"
    ny_streets = "NY_STREETS.dbf"

    g = nx.DiGraph()

    count = 0
    test1 = DBF(os.path.join(dir,ny_streets))
    print " Started reading NY Street data\n"

    for record in test1:

        edge_id = record['edge_ID']
        fn_id = record['FN_ID']
        tn_id = record['TN_ID']
        length = record['length']

        #print fn_id,tn_id,length, edge_id

        if ( not g.has_node(fn_id)):
            g.add_node(fn_id)
        if (not g.has_node(tn_id)):
            g.add_node(fn_id)
        g.add_weighted_edges_from([(fn_id,tn_id,length)],edge_id=edge_id)
        g.add_weighted_edges_from([(tn_id,fn_id,length)],edge_id=edge_id)

        count += 1
        if (count%1000 == 0):
            print "No of edges read :", count

    print '\n'
    print " Total Edges in Streets  : " , count
    print '\n'
    return g
Exemple #26
0
def concat_dfs(list_df_names, list_state_names, list_of_paths):
    """ Concatenate passed dataframes to create bigger ones.
    Args:
        list_df_names: list with the names of the resulting dfs. They are the keys
                    of the resulting dictionary where the resulting dfs will be
                    stored
        state_names_nac: list with the state names (Nacional included or not)
        list_of_paths: list with the paths where the data to create the 
                       auxiliary dataframes is located
        
    """
    dic_of_aux_dfs = {}
    dic_final_dfs = {}
    x = 0
    for l in list_df_names:
        dic_of_aux_dfs[l] = {
            k: pd.DataFrame(iter(DBF(v)))
            for k, v in zip(list_state_names, list_of_paths[x])
        }

        dic_final_dfs[l] = pd.concat(dic_of_aux_dfs[l])

        x += 1

    return dic_final_dfs
Exemple #27
0
def draw(magnitudeDictionary, title):
    plt.close()

    sf = shp.Reader("Shape/county.shp", )
    table = DBF('Shape/county.dbf')

    maxVal = max(magnitudeDictionary.values())

    names = []
    for record in table:
        names.append(record["COUNTY"])

    plt.figure()
    plt.suptitle(title)

    # numShapes = len(sf.shapeRecords())
    if maxVal != 0:
        step = 1 / maxVal
    else:
        step = 0

    current = 0
    for shape in sf.shapeRecords():
        curMag = magnitudeDictionary[names[current]]

        c = (curMag * step, 1 - curMag * step, 0)
        x = [i[0] for i in shape.shape.points[:]]
        y = [i[1] for i in shape.shape.points[:]]
        plt.fill(x, y, color=c)
        plt.plot(x, y, color='black')
        current += 1
    plt.show()


# draw()
Exemple #28
0
def data_info(file_path):
    if get_extension(file_path) == "dbf":
        print("allo")

    # results = pd.read_excel("commune.xls",sheet_name="Donnees")
    # head = results.head(5)
    # tail = results.tail(5)

    # print(head)
    # print(".. | .."+"\n.. | .."+"\n.. | .."+"\n.. v ..")
    # print(tail)

    # result1 = results.loc[0][0]
    # result2 = results.loc[0][1]

    # print(type(result2))
    # print("[{}][{}] --> {}".format(0,0,result1))
    # print("[{}][{}] --> {}".format(0,1,result2))

    print(type('00.1'))

    dbf = DBF("81-.dbf")
    # print("{}".format(dbf.fields))
    # print(dbf.records[0])

    print("type" + str(ast.literal_eval('1')))
    print("{}".format(type(ast.literal_eval('200.0'))))
    print(type(df.dtypes))

    # Number of rows in .dbf file
    print(len(dbf))

    # Number of columns in .dbf file
    print(len(dbf.fields))
Exemple #29
0
def get_CID10_table(cache=True):
    """
    Fetch the CID10 table
    :param cache:
    :return:
    """
    ftp = FTP('ftp.datasus.gov.br')
    ftp.login()
    ftp.cwd('/dissemin/publicos/SIM/CID10/TABELAS')
    fname = 'CID10.DBF'
    cachefile = os.path.join(CACHEPATH,
                             'SIM_' + fname.split('.')[0] + '_.parquet')
    if os.path.exists(cachefile):
        df = pd.read_parquet(cachefile)
        return df
    try:
        ftp.retrbinary('RETR {}'.format(fname), open(fname, 'wb').write)
    except:
        raise Exception('Could not download {}'.format(fname))
    dbf = DBF(fname, encoding='iso-8859-1')
    df = pd.DataFrame(list(dbf))
    if cache:
        df.to_parquet(cachefile)
    os.unlink(fname)
    return df
Exemple #30
0
def get_dbf_counts(folder_path=None):
    if not folder_path:
        return
    else:
        files = [f for f in os.listdir(os.path.join(".", folder_path))
                 if f[-10:] == 'Counts.dbf']
    return DBF(os.path.join(folder_path, files[0]))
Exemple #31
0
def dbf2eod(filepath,obj):
    hasil=[]
    gagal=0
    JML=0
    DQTY=0
    DTTL=0    
    filename=os.path.basename(filepath)
    linking='<a href="/felino/eod/'+filename+'">'+filename+'</a>'
    
    objects= http.request.env['felino.eoddetail'].search([],limit=30)
    for item in DBF(filepath,encoding='iso-8859-1'):
        bisa=['PLU','D%1','RTN','VOD','DS1']
        tampil=False
        if item['FLAG'] in bisa:
           tampil=True
           JML=JML+1
           idx=bisa.index(item['FLAG'])
           if item['FLAG']=='RTN':
              QTY=-1*item['QTY'] 
           elif  item['FLAG']=='D%1':
              QTY=0 
           elif  item['FLAG']=='DS1':
              QTY=0    
           else:
              QTY=item['QTY'] 
           DQTY=DQTY+QTY    
           DTTL=DTTL+(QTY*item['PRICE'])
           product= http.request.env['felino.felino'].search([('barcode','=',item['CODE'])])
           eod={'name':filename,'code':item['CODE'],'barcode':item['CODE'],'desc':item['DESC'],'qty':QTY,'price':item['PRICE'],'norcp':item['NORCP'],'etype':item['ETYPE'],'flag':item['FLAG'],'cprice':item['CPRICE'],'hide':tampil,'category':product['catagory']}
           objects.sudo().create(eod)
           hasil.append(eod)  
    eod={'name':filename,'link':linking,'Child':JML,'Child1':DQTY,'totalsales':DTTL}
    obj.sudo().create(eod)
    return hasil 
Exemple #32
0
def get_fields(db: str) -> List[str]:
    fields = list()
    record = DBF(db).__iter__().__next__()
    for name in record.keys():
        fields.append(name)
    return sorted(fields)
Exemple #33
0
import sys

kshcsv=sys.argv[1]
xhlist=[]
f1=open(kshcsv,'r')
i=0
for line in f1:
   if len(line)>0:
     xhlist.append(line[0:-1])
     i=i+1
   #print(line[0:-1])
print("total:{}".format(i))


f=open("exec_{}.sql".format(kshcsv),'w+')
table0 = DBF('ZXSMD_0000_50z.dbf')
table0.encoding='gb18030'
for row in table0:
    #print("{},{}".format(str(row['XH']),(str(row['XH']) in xhlist)))
    if str(row['XH']) in xhlist:
       print("{},{}".format(row['XH'],row['KSH']))
       print("update eas_schroll_student set examno='{}' where studentcode='{}';".format(row['KSH'],row['XH']),file=f)
       i=i-1


table = DBF('ZXSMD_0000_50b.dbf')
table.encoding='gb18030'
for row in table:
    if str(row['XH']) in xhlist:
       print("{},{}".format(row['XH'],row['KSH']))
       print("update eas_schroll_student set examno='{}' where studentcode='{}';".format(row['KSH'],row['XH']),file=f)
Exemple #34
0
"""
Return records as named tuples.

This saves a lot of memory.
"""
from collections import namedtuple
from dbfread import DBF

table = DBF('files/people.dbf', lowernames=True)

# Set record factory. This must be done after
# the table is opened because it needs the field
# names.
Record = namedtuple('Record', table.field_names)
factory = lambda lst: Record(**dict(lst))
table.recfactory = factory

for record in table:
    print(record.name)