class HadoopExporter(BaseItemExporter): def __init__(self, hadoop, **kwargs): #self.con = file_write.Connection() #self.con.connect(hadoop.ip, hadoop.port) self.encoder = ScrapyJSONEncoder(**kwargs) #self.seq = file_write.SeqFileSaver(self.con, '/common/crawler/%s/' % hadoop.username.replace(".", "/"), # 1, '%s' % hadoop.username.replace(".", "_")) self.encoding = 'utf-8' self.fields_to_export = None self.export_empty_fields = False self.writer = SeqWriter(os.path.join(Utils.settings['SEQFILE_DIR'], hadoop.username.replace(".", "/")), hadoop.username.replace(".", "_")) def close_file(self): print "close" self.writer.close() #self.seq.set_is_end() #self.con.close() def start_exporting(self): pass def finish_exporting(self): pass def export_item(self, item): value = self.encoder.encode(dict(self._get_serialized_fields(item))) self.writer.writeData( item['key'] if 'key' in item else item['url'], value )
def __init__(self, hadoop, **kwargs): #self.con = file_write.Connection() #self.con.connect(hadoop.ip, hadoop.port) self.encoder = ScrapyJSONEncoder(**kwargs) #self.seq = file_write.SeqFileSaver(self.con, '/common/crawler/%s/' % hadoop.username.replace(".", "/"), # 1, '%s' % hadoop.username.replace(".", "_")) self.encoding = 'utf-8' self.fields_to_export = None self.export_empty_fields = False self.writer = SeqWriter(os.path.join(Utils.settings['SEQFILE_DIR'], hadoop.username.replace(".", "/")), hadoop.username.replace(".", "_"))