Ejemplo n.º 1
0
def loadData(fpath, dbFunc):
    if not(os.path.exists(fpath)):
        raise NameError('Path not exist')
    allDone = getAllDone()
    if allDone.count(fpath):
        print 'Already Done!'
        return
    print 'Loading File "%s"...'%os.path.basename(fpath)
    subjCont = csvAnalyzer.analyzeFile(fpath)
    print 'Inserting To DB...'
    succ = dbFunc(subjCont)
    if succ:
        logFile(fpath)
        print 'Done!'
    else:
        print 'Error, please see logs...'
Ejemplo n.º 2
0
 def __init__(self):
     # 1. Target
     self.db_name = 'postgres'
     self.user_name = 'postgres'
     self.password = '******'
     self.schema_name = 'DB_Schema_Name' 
     self.tableName = 'DB_Table_Name' # Target Table name, will create it from data source files columns if not exists
     
     # 2. Source
     self.dataDir = '~/Downloads/Kaggle/Data' # folder with files to upload to db - search recursive in this folder for
     # files that return true for self.filesFormatFunc condition. it can load multipal files to same table. when it fails
     # next time it starts, its skips the already success files uploaded
     self.filesFormatFunc = lambda f : re.match('.*Test_.*\.txt') <> None # files filter to upload from dataDir table
     
     # 3. Other Tunings and configurations of data parsing and manipulations:
     self.upload_bulk_size = 1000 # Bulk size for uploading to DB, we can use the same number also for reading file
     # Put None to cancel bulking and load all the file as 1 bulk
     self.dataAnalyzerFunLazy = lambda f, h:  csvAnalyzer.analyzeFile(f, header = h, delimiter = ',', #delimeter fot csv
                                                               manipulationFunc = None, #dictionary for functions to manipulate fields in the csv. example 1
                                                               additionalConstFields = None, #additionalFields to add fields for table. see example 2
                                                               topN = self.upload_bulk_size)  # file reading bulk size