예제 #1
0
class HadoopExporter(BaseItemExporter):
    def __init__(self, hadoop, **kwargs):
        #self.con = file_write.Connection()
        #self.con.connect(hadoop.ip, hadoop.port)
        self.encoder = ScrapyJSONEncoder(**kwargs)
        #self.seq = file_write.SeqFileSaver(self.con, '/common/crawler/%s/' % hadoop.username.replace(".", "/"),
        #                                   1, '%s' % hadoop.username.replace(".", "_"))
        self.encoding = 'utf-8'
        self.fields_to_export = None
        self.export_empty_fields = False
        self.writer = SeqWriter(os.path.join(Utils.settings['SEQFILE_DIR'], hadoop.username.replace(".", "/")),
                                hadoop.username.replace(".", "_"))

    def close_file(self):
        print "close"
        self.writer.close()
        #self.seq.set_is_end()
        #self.con.close()

    def start_exporting(self):
        pass

    def finish_exporting(self):
        pass

    def export_item(self, item):
        value = self.encoder.encode(dict(self._get_serialized_fields(item)))
        self.writer.writeData(
            item['key'] if 'key' in item else item['url'],
            value
        )
예제 #2
0
 def __init__(self, hadoop, **kwargs):
     #self.con = file_write.Connection()
     #self.con.connect(hadoop.ip, hadoop.port)
     self.encoder = ScrapyJSONEncoder(**kwargs)
     #self.seq = file_write.SeqFileSaver(self.con, '/common/crawler/%s/' % hadoop.username.replace(".", "/"),
     #                                   1, '%s' % hadoop.username.replace(".", "_"))
     self.encoding = 'utf-8'
     self.fields_to_export = None
     self.export_empty_fields = False
     self.writer = SeqWriter(os.path.join(Utils.settings['SEQFILE_DIR'], hadoop.username.replace(".", "/")),
                             hadoop.username.replace(".", "_"))