Beispiel #1
0
def writeTDE(q, outputFileName, types):
    # Define type maps
    typesTable = {
        'Bool': tde.Type.BOOLEAN,
        'Int': tde.Type.INTEGER,
        'Double': tde.Type.DOUBLE,
        'Date': tde.Type.DATE,
        'DateTime': tde.Type.DATETIME,
        'Duration': tde.Type.DURATION,
        'CharString': tde.Type.CHAR_STRING,
        'UnicodeString': tde.Type.UNICODE_STRING
    }

    # Step 1: Create the Extract file and open the .csv

    tdefile = tde.Extract(outputFileName + '.tde')

    if tdefile.hasTable('Extract'):
        table = tdefile.openTable('Extract')
        tableDef = table.getTableDefinition()
        columns = q.get()
        numColumns = len(columns)

    else:
        # Step 2: Create the tableDef
        tableDef = tde.TableDefinition()
        columns = q.get()
        numColumns = len(columns)

        for i in range(numColumns):
            tableDef.addColumn(columns[i], typesTable[types[i]])

        # Step 3: Create the table in the image of the tableDef

        table = tdefile.addTable('Extract', tableDef)

    # Step 4: Loop through the csv, grab all the data, put it into rows
    # and insert the rows into  the table

    newrow = tde.Row(tableDef)

    while True:
        nextLine = q.get()

        if nextLine:
            for columnIndex in range(numColumns):
                insertRow(newrow, columnIndex, nextLine[columnIndex],
                          types[columnIndex])
            table.insert(newrow)
        else:
            break

    # Step 5: Close the tde

    tdefile.close()
def write_tde(table_df, tde_fullpath, arg_append):
    """
    Writes the given pandas dataframe to the Tableau Data Extract given by tde_fullpath
    """
    if arg_append and not os.path.isfile(tde_fullpath):
        print "Couldn't append -- file doesn't exist"
        arg_append = False

    # Remove it if already exists
    if not arg_append and os.path.exists(tde_fullpath):
        os.remove(tde_fullpath)
    tdefile = tde.Extract(tde_fullpath)

    # define the table definition
    table_def = tde.TableDefinition()

    # create a list of column names
    colnames = table_df.columns
    # create a list of column types
    coltypes = table_df.dtypes

    # for each column, add the appropriate info the Table Definition
    for col_idx in range(0, len(colnames)):
        cname = colnames[col_idx]
        ctype = fieldMap[str(coltypes[col_idx])]
        table_def.addColumn(cname, ctype)

    # create the extract from the Table Definition
    if arg_append:
        tde_table = tdefile.openTable('Extract')
    else:
        tde_table = tdefile.addTable('Extract', table_def)
    row = tde.Row(table_def)

    for r in range(0, table_df.shape[0]):
        for c in range(0, len(coltypes)):
            if str(coltypes[c]) == 'float64':
                row.setDouble(c, table_df.iloc[r, c])
            elif str(coltypes[c]) == 'float32':
                row.setDouble(c, table_df.iloc[r, c])
            elif str(coltypes[c]) == 'int64':
                row.setDouble(c, table_df.iloc[r, c])
            elif str(coltypes[c]) == 'int32':
                row.setDouble(c, table_df.iloc[r, c])
            elif str(coltypes[c]) == 'object':
                row.setString(c, table_df.iloc[r, c])
            elif str(coltypes[c]) == 'bool':
                row.setBoolean(c, table_df.iloc[r, c])
            else:
                row.setNull(c)
        # insert the row
        tde_table.insert(row)

    tdefile.close()
    print "Wrote %d lines to %s" % (len(table_df), tde_fullpath)
Beispiel #3
0
def pd_tde(df,fname):
	try:
		os.system('rm -f ' + fname)
		os.system('rm -f DataExtract*.log')
		tdefile = tde.Extract(fname)
	except:
		tdefile = tde.Extract(fname)

	tableDef = tde.TableDefinition()
	colnames = df.columns
	coltypes = df.dtypes

	# for each column, add the appropriate info the Table Definition
	for i in range(0, len(colnames)):
		cname = colnames[i]
    		ctype = fieldMap.get(str(coltypes[i]))
    		tableDef.addColumn(cname, ctype)  		

	with tdefile as extract:
		table = extract.addTable("Extract", tableDef)
	
		for r in range(0, df.shape[0]):
        		row = tde.Row(tableDef)
        	for c in range(0, len(coltypes)):
            		if str(coltypes[c]) == 'float64':
            		    row.setDouble(c, df.iloc[r,c])
            		elif str(coltypes[c]) == 'float32':
            		    row.setDouble(c, df.iloc[r,c])
            		elif str(coltypes[c]) == 'int64':
            		    row.setDouble(c, df.iloc[r,c])   
            		elif str(coltypes[c]) == 'int32':
            		    row.setDouble(c, df.iloc[r,c])
            		elif str(coltypes[c]) == 'object':
            		    row.setString(c, df.iloc[r,c]) 
            		elif str(coltypes[c]) == 'bool':
            		    row.setBoolean(c, df.iloc[r,c])
            		else:
            		    row.setNull(c)
        	# insert the row
        	table.insert(row)
Beispiel #4
0
        # if we have a header, we don't want to try and process it
        if hasheader == True:
            csvreader.next()
        print '[',
        for row in csvreader:

            if rowoutput == True:  # row deets, else just '.'
                print '************** INSERTING ROW NUMBER: ' + str(
                    rowsinserted) + '**************'  # debug output
            else:  # only print dot every 50 records
                if (rowsinserted % dotsevery) == 0:
                    print '=',

            columnposition = 0
            newrow = tde.Row(tableDef)

            for t in range(numfields):
                fieldtype = dtypes[t].replace("<type '", "").replace(
                    "'>",
                    "").replace("<class '",
                                "").replace('NoneType',
                                            'str').replace('uuid.UUID', 'str')
                fieldname = dfields[t]

                if rowoutput == True:  # column deets
                    print str(columnposition) + ' ' + fieldname + ':   ' + str(
                        row[fieldname]) + ' (' + str(fieldtype).split(
                            '.')[0] + ')'  # debug output

                if fieldtype == 'str':
def extract(file_name):
    # move file to /extract
    # cd to /extract
    # if there is no extract called TRACK_TERM then create one, otherwise append to TRACK_TERM
    # define data model for extract

    global WORKING_DIRECTORY, TRACK_TERM

    from_path = WORKING_DIRECTORY + '/' + file_name
    to_path = WORKING_DIRECTORY + '/extract/' + file_name

    os.rename(from_path, to_path)

    os.chdir(WORKING_DIRECTORY + '/extract')

    # define the extract
    with tde.Extract(TRACK_TERM + '.tde') as extract:

        tableDef = tde.TableDefinition()

        # define the columns and the data types in the extract
        tableDef.addColumn('lang', tde.Type.CHAR_STRING)  #0
        tableDef.addColumn('sentiment', tde.Type.DOUBLE)  #1
        tableDef.addColumn('country', tde.Type.CHAR_STRING)  #2
        tableDef.addColumn('created_at', tde.Type.DATETIME)  #3
        tableDef.addColumn('tweet_text', tde.Type.CHAR_STRING)  #4
        tableDef.addColumn('Longitude', tde.Type.DOUBLE)  #5
        tableDef.addColumn('source', tde.Type.CHAR_STRING)  #6
        tableDef.addColumn('user', tde.Type.CHAR_STRING)  #7
        tableDef.addColumn('Latitude', tde.Type.DOUBLE)  #8

        table = None

        if not extract.hasTable('Extract'):
            # Table does not exist, so create it.
            print "Creating a new extract"
            table = extract.addTable('Extract', tableDef)
        else:
            # Table exists, so append the new data.
            print "Appending to an existing extract"
            table = extract.openTable('Extract')

        new_row = tde.Row(tableDef)

        # read the data from the CSV into the extract row object
        with open(file_name, 'r') as inf:
            reader = csv.DictReader(inf, delimiter=',', lineterminator='\n')
            for row in reader:

                # insert data into the row object in the correct order as defined above
                new_row.setCharString(0, row['lang'])

                sentiment = float(row['sentiment'])
                new_row.setDouble(1, sentiment)

                new_row.setCharString(2, row['country'])

                # parse the twitter date string:
                # Mon Sep 21 11:03:53 +0000 2015
                # %a %b %d %H:%M:%S +0000 %Y
                date_object = datetime.strptime(row['created_at'],
                                                '%a %b %d %H:%M:%S +0000 %Y')
                year = int(datetime.strftime(date_object, '%Y'))
                month = int(datetime.strftime(date_object, '%m'))
                day = int(datetime.strftime(date_object, '%d'))
                hour = int(datetime.strftime(date_object, '%H'))
                min = int(datetime.strftime(date_object, '%M'))
                sec = int(datetime.strftime(date_object, '%S'))
                frac = 0  # fractions of a second aka milliseconds
                new_row.setDateTime(3, year, month, day, hour, min, sec, frac)

                new_row.setCharString(4, row['tweet_text'])

                # check if there is a value for longitude, otherwise write a 0
                try:
                    longitude = float(row['longitude'])
                except:
                    longitude = 0
                new_row.setDouble(5, longitude)

                new_row.setCharString(6, row['source'])
                new_row.setCharString(7, row['user'])

                # check if there is a value for latitude, otherwise write a 0
                try:
                    latitude = float(row['latitude'])
                except:
                    latitude = 0
                new_row.setDouble(8, latitude)

                table.insert(new_row)

    # if the process fails we want to be able to re-run it without collisions between file names
    # so give each file a unique name (unix time stamp in this case).
    os.rename(file_name, str(time.time()).split('.')[0] + '.csv')

    # cd back to working directory
    os.chdir(WORKING_DIRECTORY)

    return
def createTDEFile():
    # This part is unique for each extract.
    # Will need to modify..
    # TODO: Make this more dynamic
    ################################################################################################
    ################################################################################################

    csvSchema = []
    csvSchema.append({'<COLUMNNAME>': tdeTypes['INTEGER']})

    ################################################################################################
    ################################################################################################

    # Try to create the TDE File
    try:
        tdeFile = tde.Extract(outputfile + '.tde')

    except:
        os.remove(outputfile + '.tde')
        tdeFile = tde.Extract(outputfile + '.tde')

    # Open newly created CSV File
    csvFile = open(outputfile + '.csv', "r")
    reader = csv.reader(csvFile)
    print 'Reading records from %s' % (outputfile + '.csv')

    # Create TDE Table definition
    tdeTableDef = tde.TableDefinition()

    # Build TDE Table Definition from csv schema above
    print 'Defined table schema:'
    for index, item in enumerate(csvSchema):
        for k, v in item.items():
            print 'Column %i: %s <%s>' % (
                index, k, tdeTypes.keys()[tdeTypes.values().index(v)])
            tdeTableDef.addColumn(k, v)

    # Add table to TDE File
    tdeTable = tdeFile.addTable("Extract", tdeTableDef)

    # Iterate through rows and columns of csv and adding them to TDE File
    print 'Writing records to %s' % (csvName + '.tde')

    rownum = 0
    for row in reader:
        if rownum == 0:
            header = row
        else:
            colnum = 0
            tdeRow = tde.Row(tdeTableDef)
            for col in row:
                if colnum + 1 > len(csvSchema):
                    print 'Something is missing here.'
                    break
                add_tde_col(colnum, tdeRow, row[colnum],
                            csvSchema[colnum].values()[0])
                colnum += 1
            tdeTable.insert(tdeRow)
            tdeRow.close()
        rownum += 1

    print '%i rows added in total in %f seconds' % (rownum - 1,
                                                    time.clock() - startTime)
    print 'Closing TDE and CSV File...'
    tdeFile.close()
    csvFile.close()
Beispiel #7
0
# step 03 create a blank extract
# #########################################################
dataExtract = tde.Extract('irisExtract.tde')

# step 04 define schema
# #########################################################
dataSchema = tde.TableDefinition()
dataSchema.addColumn('sepal-length', tde.Type.DOUBLE)
dataSchema.addColumn('sepal-width', tde.Type.DOUBLE)
dataSchema.addColumn('petal-length', tde.Type.DOUBLE)
dataSchema.addColumn('petal-width', tde.Type.DOUBLE)
dataSchema.addColumn('class', tde.Type.CHAR_STRING)

# step 05 connect schema with blank extract
# #########################################################
table = dataExtract.addTable('Extract', dataSchema)

# step 06 fill extract with data
# #########################################################
newRow = tde.Row(dataSchema)
for i in range(0, len(dataset)):
    newRow.setDouble	(0, dataset['sepal-length'][i])
    newRow.setDouble		(1, dataset['sepal-width'][i])
    newRow.setDouble		(2, dataset['petal-length'][i])
    newRow.setDouble		(3, dataset['petal-width'][i])
    newRow.setCharString	(4, dataset['class'][i])
    table.insert(newRow)

# step 07 close the extract
# #########################################################
dataExtract.close()