def push(format, metainfo, instream, outfile, encoding, delimiter=None, **kwargs): if not encoding: encoding = "utf8" dialect = kwargs.get('dialect', default_dialect) if delimiter: dialect.delimiter = delimiter writer = UnicodeCSVWriter(outfile, dialect=dialect, encoding=encoding) writer.writerow(metainfo.fields) for k in instream: if isinstance(k, StreamFooter): break else: writer.writerow(k)
def push_sql(stream, database_kind, table=None, host=None, create_table=False, drop_table=False, protocol=None, database=None, ssh_host=None, user=None, password=None, sql_command=None, delete_partition=False, **kwargs): db_params = PUSH_DB[database_kind] c = db_params['command'] if user: c = c + [db_params['user'] % user] if password: c = c + [db_params['password'] % password] if not database: raise Exception("Missing parameter (database)") c = c + [database] for row in stream: if isinstance(row, StreamHeader): metainfo = row if not table: table_name = metainfo.typename else: table_name = table p = Popen(c, stdin=PIPE, stdout=None, stderr=None) if drop_table: drop_table_query = db_params['drop_table'] % table_name p.stdin.write(drop_table_query) p.stdin.flush() if p.returncode: break if create_table: fields = ','.join([name + ' varchar(255)' for name in metainfo.fields]) create_table_query = Template(db_params['create_table']).substitute(table=table_name, fields=fields) p.stdin.write(create_table_query) p.stdin.flush() if p.returncode: break if delete_partition and not drop_table: if not metainfo.partition: raise Exception("No partition information available in header: unable to delete partition") conditions = ["%s = '%s'" % (k, str(v)) for (k, v) in metainfo.partition.iteritems()] condition = ' AND '.join(conditions) delete_partition_query = Template(db_params['delete_partition']).substitute(table=table_name, condition=condition) p.stdin.write(delete_partition_query) p.stdin.flush() if p.returncode: break p.stdin.close() p.wait() writestream = None #print import_query if "import_query" in db_params: p = Popen(c, stdin=PIPE, stdout=None, stderr=None) tmpfifo = TempFifo() import_query = db_params['import_query'] % (tmpfifo.filename, table_name) p.stdin.write(import_query) p.stdin.flush() writestream = tmpfifo.open_write() elif 'load_command' in db_params: load_command = [Template(s).substitute(table=table_name, database=database) for s in db_params['load_command']] print load_command pp = Popen(load_command, stdin=PIPE, stdout=None, stderr=None) writestream = pp.stdin else: raise Exception("Missing load_command or import_query in db_kind spec") writer = UnicodeCSVWriter(writestream, dialect=sql_dialect(), encoding="utf-8") #writer = csv.writer(writestream, dialect=sql_dialect()) elif isinstance(row, StreamFooter): if "import_query" in db_params: tmpfifo.close() p.stdin.close() p.wait() elif 'load_command' in db_params: pp.stdin.close() pp.wait() else: writer.writerow(row)
def push_sql(stream, database_kind, table=None, host=None, create_table=False, drop_table=False, protocol=None, database=None, ssh_host=None, user=None, password=None, sql_command=None, delete_partition=False, **kwargs): db_params = PUSH_DB[database_kind] c = db_params['command'] if user: c = c + [db_params['user'] % user] if password: c = c + [db_params['password'] % password] if not database: raise Exception("Missing parameter (database)") c = c + [database] for row in stream: if isinstance(row, StreamHeader): metainfo = row if not table: table_name = metainfo.typename else: table_name = table p = Popen(c, stdin=PIPE, stdout=None, stderr=None) if drop_table: drop_table_query = db_params['drop_table'] % table_name p.stdin.write(drop_table_query) p.stdin.flush() if p.returncode: break if create_table: fields = ','.join( [name + ' varchar(255)' for name in metainfo.fields]) create_table_query = Template( db_params['create_table']).substitute(table=table_name, fields=fields) p.stdin.write(create_table_query) p.stdin.flush() if p.returncode: break if delete_partition and not drop_table: if not metainfo.partition: raise Exception( "No partition information available in header: unable to delete partition" ) conditions = [ "%s = '%s'" % (k, str(v)) for (k, v) in metainfo.partition.iteritems() ] condition = ' AND '.join(conditions) delete_partition_query = Template( db_params['delete_partition']).substitute( table=table_name, condition=condition) p.stdin.write(delete_partition_query) p.stdin.flush() if p.returncode: break p.stdin.close() p.wait() writestream = None #print import_query if "import_query" in db_params: p = Popen(c, stdin=PIPE, stdout=None, stderr=None) tmpfifo = TempFifo() import_query = db_params['import_query'] % (tmpfifo.filename, table_name) p.stdin.write(import_query) p.stdin.flush() writestream = tmpfifo.open_write() elif 'load_command' in db_params: load_command = [ Template(s).substitute(table=table_name, database=database) for s in db_params['load_command'] ] print load_command pp = Popen(load_command, stdin=PIPE, stdout=None, stderr=None) writestream = pp.stdin else: raise Exception( "Missing load_command or import_query in db_kind spec") writer = UnicodeCSVWriter(writestream, dialect=sql_dialect(), encoding="utf-8") #writer = csv.writer(writestream, dialect=sql_dialect()) elif isinstance(row, StreamFooter): if "import_query" in db_params: tmpfifo.close() p.stdin.close() p.wait() elif 'load_command' in db_params: pp.stdin.close() pp.wait() else: writer.writerow(row)