def processtransfile(location,sourcefile,source,user_id):

    myfile = file.File(sourcefile, location)

    mycontent = myfile.get_cleaned_content()
    testdb = db.Db_handler_user('finance', "nightsky78","Wolfpack",\
                       "192.168.2.221",\
                '5432', user_id )

    mysource = testdb.retrieve_source_values(source)
    sourcelength = mysource[0][2]
    sourcedelimiter = mysource[0][1]
    ger_to_us_col = mysource[0][5]
    incl_column = [mysource[0][3],mysource[0][4],mysource[0][5]]
    i = 0
    excl_column = []
    for i in range (0, sourcelength):
        if i not in incl_column:
           excl_column.append(i)
        i = i+1
            
    cleancontent = myfile.get_structured_content(mycontent, sourcedelimiter ,sourcelength ,\
                                                 excl_column,exclusions, ger_to_us_col)

    hashedcontent = myfile.add_hash(cleancontent)

    # matching stub
    new_hashed_content = []
    for line in hashedcontent:
        line = line + ';Unknown'
        new_hashed_content.append(line)


    testdb.store_finance_data(new_hashed_content, source)
Beispiel #2
0
def processtransfile(location, sourcefile, source, user_id):

    # First we need to retrieve the data for the source file

    myDB = db.Db_handler_user(user_id)

    mysource = myDB.retrieve_source_values(source)
    sourcelength = mysource[0][2]
    sourcedelimiter = mysource[0][1]
    ger_to_us_col = mysource[0][5]
    skiprow = mysource[0][7]
    incl_column = [mysource[0][3], mysource[0][4], mysource[0][5]]
    subject_column = mysource[0][4]
    add_column = mysource[0][8]
    date_column = mysource[0][3]
    source_id = mysource[0][6]

    print('Source config read: {0}'.format(mysource))
    # Now find the columns to delete with Pandas method
    x = 0
    excl_columns = []
    while x < sourcelength:
        if x not in incl_column:
            excl_columns.append(x)
        x = x + 1

    # getting data back with the pandas file
    MyContent = read_csv_pandas.ReadCsvPandas(sourcefile, location)

    content = MyContent.get_pandas_content(skiprow, excl_columns,
                                           sourcedelimiter, add_column,
                                           subject_column)

    # Check if oldest entry in Excel file is newer than newest entry in Database for this sourcefile
    # Get newest entry from database
    newest_from_db = myDB.select_from_db(
        'transactions', 'max(date)', 'source_id = {0}'.format(source_id))[0][0]

    # get oldest entry from sourcefile.sourcetype
    oldest_from_file_source = pandas.to_datetime(content[date_column],
                                                 dayfirst=True).max(axis=0)

    # if oldest sourcefile entry is larger than newest entry from database continue
    # else, abort
    # This is actually not a good way to import data, as I need to remember exactly the latest import timeframe.
    # For now its OK, but it better to check during import, if it is a duplicate and only insert in DB if not.
    if oldest_from_file_source > pandas.to_datetime(newest_from_db):
        # Continue processing the import file

        print("Data is new")
        # swapping , to . in amount col
        new_content = MyContent.ger_to_us(content, ger_to_us_col)

        # add column with hash and add as as column with index sourcelength
        # This makes sure, that it is definitely a new column
        # The data can be accessed with the source length for the original file
        hashed_content = MyContent.add_hash_pandas(new_content, sourcelength)

        # add column with value = unknown
        # Pandas column number is the sourcelength plus one to make a new dedicated column
        hashed_content[sourcelength + 1] = "Unknown"

        myDB.store_finance_data_pandas(hashed_content, source)

    else:
        print(
            'Some Datasets have already been imported. Select a sourcefile which does not overlap!'
        )
        exit(99)
Beispiel #3
0
from db_handling import db
import configparser

source_config = configparser.ConfigParser()
configfilepath = r'../FinanceSource.config'
source_config.read(configfilepath)

sourcename = source_config.get('fileconf2', 'sourcetype')

print('Adding matching value for source: {0}'.format(sourcename))

name = "Auszug1"
pattern = 'MALERBETRIEB'
pattern_loc = 1
category_id = 16
user_id = 1

db = db.Db_handler_user(user_id)

source_id = db.retrieve_source_values(sourcename)

db.store_matching_values(name, source_id[0][6], pattern, pattern_loc,
                         category_id)
Beispiel #4
0
from _Archive import file

# import file

#location = r'/vagrant/Finance'
location = r'C:\Users\Johannes\OneDrive\Johannes files\Python\FSND-Virtual-Machine\vagrant\Finance'
source = 'dkbkredit'

# Konto
#myfile = file.File("15908965.csv",r"C:\Users\q204249\Desktop\Python\Finance")

#Kreditkarte
myfile = file.File("4748________0796.csv", location)

testdb = db.Db_handler_user('Finance', "nightsky78","Wolfpack",\
                       "johanneshettigdb.cvadegidr7b8.ap-northeast-1.rds.amazonaws.com",\
                '5432', '1' )

mycontent = myfile.get_cleaned_content()
#        for line in mycontent:
#            print(line)

#Konto
#excl_column = [0,2,5,6,8,9,10,11]
#ger_to_us_col = [7]
#exclusions = ["Wertstellung"]
#cleancontent = myfile.get_structured_content(mycontent, ";", 12, excl_column, exclusions, ger_to_us_col)

#Kreditkarte
mysource = testdb.retrieve_source_values(source)
sourcelength = mysource[0][2]