Example #1
0
    def open_spider(self, spider):

        if spider.name == 'fto_branch':

            self.file = open('./output/transactions_alt' + '.csv', 'w+b')
            self.exporter = CsvItemExporter(self.file)
            self.exporter.start_exporting()
Example #2
0
    def spider_closed(self, spider):

        try:

            file = open('res.csv', 'w+b')

            self.exporter = CsvItemExporter(file)

            self.exporter.fields_to_export = [
                'name', 'image', 'link', 'address'
            ]

            self.exporter.start_exporting()

            for item in self.ordered:

                for res in self.result:

                    if item['name'] == res['name']:

                        self.exporter.export_item(item)

            self.exporter.finish_exporting()

            file.close()

        except:

            pass
Example #3
0
    def open_spider(self, spider):

        if spider.name == 'fto_urls':

            self.file = open('./output/' + spider.stage + '.csv', 'w+b')
            self.exporter = CsvItemExporter(self.file)
            self.exporter.start_exporting()
 def spider_opened(self, spider):
     file = open('%s_%s.csv' % (spider.name, datetime.datetime.strftime(datetime.datetime.now(),'%Y%m%d')), 'w+b')
     self.files[spider] = file
     self.exporter = CsvItemExporter(file)
     # self.exporter.fields_to_export = ['company_name','contact','phone1','phone2','email','average','reviews','address','member_for','based_in']
     self.exporter.fields_to_export = ['title', 'save', 'desc', 'long_desc', 'image']
     self.exporter.start_exporting()        
Example #5
0
 def spider_opened(self, spider):
     if spider.name == 'mcc1':
         self.file = open('MicrocapCollector/spiders/data/data1.csv', 'w+b')
     if spider.name == 'mcc2':
         self.file = open('MicrocapCollector/spiders/data/data2.csv', 'w+b')
     self.exporter = CsvItemExporter(self.file, delimiter=',')
     self.exporter.start_exporting()
Example #6
0
    def spider_opened(self, spider):
        latest_polls_file = open('data/' + spider.name + '_latest.csv', 'w')
        self.latest_polls_files[spider] = latest_polls_file

        exporter = CsvItemExporter(latest_polls_file,
                                   fields_to_export=spider.fields_to_export)
        exporter.start_exporting()
        self.exporters[spider] = exporter

        prev_polls_fName = 'data/' + spider.name + '_dict.json'
        try:
            prev_polls_file = open(prev_polls_fName, 'r')
            prev_polls = json.load(prev_polls_file)
            prev_polls_file.close()
        except (IOError):
            # data/dict.json doesn't exist
            prev_polls = []
        except ValueError:
            # dict.json is malformed, should be inspected before being overwritten
            raise ValueError("Malformed prev_polls_file for " + spider.name +
                             ".")
        self.prev_polls_fNames[spider] = prev_polls_fName
        self.prev_polls[spider] = prev_polls

        self.newitems[spider] = []
Example #7
0
class CsdnPipeline(object):
    users={}
    def __init__(self):
        self.file=None
        self.exporter=None
    def set_file(self,filename):
        self.file=open(filename,'wb')
        self.exporter=CsvItemExporter(self.file)

    def process_item(self, item, spider):
        if spider.name=="csdn.user":
            if self.file is None:
                self.set_file("export_users.csv")
            else:
                self.exporter.export_item(item)
        if spider.name=="csdn.login":
            if item['username']:
                sql='update t_csdn_users set real_password=:password,real_email=:email where username=:username';
                username=item['username']
                password=item['password']
                email=item['email']
                param={'username':username,'password':password,'email':email}
                spider.oracle_db.execute_sql(sql,param,False)
                log.msg("username:"******"\tpassword:"+password,level=log.INFO)
        return item
Example #8
0
 def spider_opened(self, spider):
     file = open('%s_items.csv' % spider.name, 'w+b')
     self.files[spider] = file
     self.exporter = CsvItemExporter(file)
     list = ['id', 'title', 'time', 'director', 'year', 'star', 'cost']
     self.exporter.fields_to_export = list
     self.exporter.start_exporting()
Example #9
0
 def spider_opened(self, spider):
     file = open('%s.csv' % spider.name, 'w+b')
     self.files[spider] = file
     self.exporter = CsvItemExporter(file)
     if spider.name == "all-coins":
         self.exporter.fields_to_export = [
             'rank',
             'name',
             'type',
             'symbol',
             'website',
             'market_cap_usd',
             'price_usd',
             'price_btc',
             'volume_24_usd',
             'change_24',
         ]
     elif spider.name == "historical-data":
         self.exporter.fields_to_export = [
             'date',
             'open_price',
             'high_price',
             'low_price',
             'close_price',
             'volume',
             'market_cap',
         ]
     self.exporter.start_exporting()
Example #10
0
 def spider_opened(self, spider):
     fname = self.get_file_name(spider, "csv")
     file = open(fname, 'w+b')
     self.files[spider] = file
     self.exporter = CsvItemExporter(file,
                                     fields_to_export=self.fields_to_export)
     self.exporter.start_exporting()
Example #11
0
 def spider_opened(self, spider):
     filename = 'douban_tv_hanju.csv'
     savefile = open(filename, 'wb+')
     self.files[spider] = savefile
     print('==========pipeline==========spider_opened==========')
     self.exporter = CsvItemExporter(savefile)
     self.exporter.start_exporting()
Example #12
0
 def spider_opened(self, spider):
   file = open('Pets_LasVegas.csv', 'w+b')
   self.files[spider] = file
   self.exporter = CsvItemExporter(file)
   self.exporter.fields_to_export = ['business_name', 'industry_category', 'city', 'state', 'phone_number',
                                     'street_address', 'website', 'email', 'url', 'count']
   self.exporter.start_exporting()
Example #13
0
    def spider_open(self, spider):
        self.csv_exporter_item   = CsvItemExporter(open("carepack.csv", "w"),quoting=csv.QUOTE_ALL )
        self.csv_exporter_detail = CsvItemExporter(open("model.csv"   ,"w" ),quoting=csv.QUOTE_ALL )

        # Make a quick copy of the list
        self.csv_exporter_item.start_exporting()
        self.csv_exporter_detail.start_exporting()
Example #14
0
    def open_spider(self, spider):

        if spider.name == 'fto_material':

            self.file = open('./output/fto_material.csv', 'w+b')
            self.exporter = CsvItemExporter(self.file)
            self.exporter.start_exporting()
Example #15
0
 def spider_opened(self, spider):
     file = open('%s_items.csv' % spider.name, 'w+b')
     self.files[spider] = file
     self.exporter = CsvItemExporter(file)
     self.exporter.fields_to_export = [
         'id', 'name', 'link', 'index', 'parent_id'
     ]
     self.exporter.start_exporting()
Example #16
0
 def spider_opened(self, spider):
     file = open('%s_items.csv' % spider.name, 'w+b')
     self.files[spider] = file
     self.exporter = CsvItemExporter(file)
     self.exporter.fields_to_export = [
         'title', 'link', 'location', 'original_price', 'price', 'end_date'
     ]
     self.exporter.start_exporting()
Example #17
0
 def spider_opened(self, spider):
     file = open('%s_%s.csv' % (spider.name, int(time.time())), 'w+b')
     self.files[spider] = file
     if 'yopt' in spider.name:
         self.exporter = CsvItemExporter(file,fields_to_export = ['date','instrument','option_symbol','symbol','expiration','type','strike','last','change','bid','ask','volume','open_int'],dialect='excel')
     elif 'prices' in spider.name:
         self.exporter = CsvItemExporter(file,fields_to_export = ['date','open','high','low','close','volume','adj_close'],dialect='excel')
     self.exporter.start_exporting()
Example #18
0
 def spider_opened(self, spider):
     file = open(
         getattr(spider, 'output_filename',
                 'result_{}.csv'.format(spider.name)), 'w+b')
     self.files[spider] = file
     self.exporter = CsvItemExporter(file)
     self.exporter.fields_to_export = getattr(spider, 'output_fields', None)
     self.exporter.start_exporting()
Example #19
0
class ExportCSV(ExportData):
    """
    Exporting to export/csv/spider-name.csv file
    """
    def spider_opened(self, spider):
        file_to_save = open('exports/csv/%s.csv' % spider.name, 'w+b')
        self.files[spider] = file_to_save
        self.exporter = CsvItemExporter(file_to_save)
        self.exporter.start_exporting()
Example #20
0
 def spider_opened(self, spider):
     file = open('%s_result.csv' % spider.name, 'w+b')
     self.files[spider] = file
     self.exporter = CsvItemExporter(file)
     self.exporter.fields_to_export = [
         'title', 'brand', 'description', 'price', 'main_image_url',
         'additional_image_urls', 'sku', 'category'
     ]
     self.exporter.start_exporting()
Example #21
0
class ExportCSV(ExportData):
    """
    Exporting to export/csv/spider-name.csv file
    """
    def spider_opened(self, spider):
        file_to_save = open('exports/csv/%s.csv' % spider.name, 'w+b')
        self.files[spider] = file_to_save
        self.exporter = CsvItemExporter(file_to_save)
        self.exporter.start_exporting()
Example #22
0
class WriteToCsv(object):
	@classmethod
	
	def process_item(self, item, spider):
		self.file = open(settings.csv_file_path, 'ab+')
		self.exporter = CsvItemExporter(self.file, include_headers_line=False)
		self.exporter.fields_to_export = settings.csv_export_fields
		self.exporter.export_item(item)
		return item
Example #23
0
 def spider_opened(self, spider):
     self.file = open('data.csv', 'w+b')
     self.exporter = CsvItemExporter(self.file)
     self.exporter.fields_to_export = [
         'product_asin', 'product_name', 'product_is_have_patten',
         'product_description', 'image_link', 'original_image', 'color',
         'patten', 'price', 'imported_code'
     ]
     self.exporter.start_exporting()
Example #24
0
 def spider_opened(self, spider):
     file = open('webScrape.csv', 'w')
     self.files[spider] = file
     self.exporter = CsvItemExporter(file, True)
     self.exporter.fields_to_export = [
         'match', 'wcRound', 'group', 'date', 'year', 'venue', 'hometeam',
         'results', 'awayteam'
     ]
     self.exporter.start_exporting()
Example #25
0
    def __init__(self):
        self.keystat_file = open('yahoo_keystat.csv', 'w+b')
        self.summary_file = open('yahoo_summary.csv', 'w+b')
        self.summary_exporter = CsvItemExporter(self.summary_file)
        self.keystat_exporter = CsvItemExporter(self.keystat_file)
        self.summary_exporter.start_exporting()
        self.keystat_exporter.start_exporting()

        dispatcher.connect(self.spider_closed, signals.spider_closed)
Example #26
0
 def __init__(self, settings):
     #here exist two variant
     #file with My_Exporter_URI exist: this is the case of a CsvItemExporter instance initilized with include_headers_line=False
     #file with My_Exporter_URI doesn't exist: is the case of include_headers_line=True
     self.filename = settings['My_Exporter_URI']
     include_headers_line = False if os.path.isfile(self.filename) else True
     self.fileCsv = open(self.filename, 'ab')
     self.exporter = CsvItemExporter(
         self.fileCsv, include_headers_line=include_headers_line)
    def spider_opened(self, spider):

        self.file = open('%s.csv' % (spider.name), 'w+b')

        self.exporter = CsvItemExporter(self.file)

        self.exporter.fields_to_export = self.headers

        self.exporter.start_exporting()
Example #28
0
 def spider_opened(self, spider):
     file = open('%s_items.csv' % spider.name, 'w+b')
     self.files[spider] = file
     self.exporter = CsvItemExporter(file)
     self.exporter.fields_to_export = [
         'Subject', 'Start_Date', 'Start_Time', 'End_Date', 'End_Time',
         'Location', 'All_Day_Event'
     ]
     self.exporter.start_exporting()
Example #29
0
 def spider_opened(self, spider):
     file = open('%s_items.csv' % spider.name, 'w+b')
     self.files[spider] = file
     self.exporter = CsvItemExporter(file)
     self.exporter.fields_to_export = [
         'Company Name', 'Current Price', 'Previous Close', 'Day\'s Range',
         'Historical Volatility', 'Market Cap', 'Shares Outstanding', 'EPS',
         'P/E Ratio', 'Beta (Volatility)', 'Percent Held by Institutions'
     ]
     self.exporter.start_exporting()
class AntigooglewebPipeline(object):
    def spider_opened(self, spider):
        print(file)
        print('hh')
        self.file = open('crawloutput.csv', 'ab')  #w+b
        self.exporter = CsvItemExporter(self.file)
        self.exporter.start_exporting()

    def process_item(self, item, spider):
        return item
Example #31
0
 def spider_opened(self, spider):
     file = open('%s_items.csv' % spider.name, 'w+b')
     self.files[spider] = file
     self.exporter = CsvItemExporter(file)
     self.exporter.fields_to_export = [
         'section', 'section_url', 'course', 'course_url',
         'material_section', 'material_section_url', 'material',
         'material_url'
     ]
     self.exporter.start_exporting()
Example #32
0
    def spider_opened(self, spider):
        includeHeader = not os.path.isfile(self.filename)
        if (not includeHeader):
            self.load_existing_posts(self.filename)

        file = open(self.filename, 'a+b')
        self.files[spider] = file
        self.exporter = CsvItemExporter(file, include_headers_line=includeHeader)
        self.exporter.fields_to_export = ['title', 'post_date', 'price', 'city', 'url']
        self.exporter.start_exporting()
Example #33
0
    def __init__(self):
        data_file = "%s.csv" % (datetime.datetime.now().strftime("%Y-%m-%d"))
        if os.path.exists(data_file):
           self.file = open(data_file,"ab+")
           self.exporter = CsvItemExporter(self.file,include_headers_line=True,encoding="gbk")
        else:
            self.file = open(data_file, "wb+")
            self.exporter = CsvItemExporter(self.file, include_headers_line=True, encoding="gbk")

        self.exporter.start_exporting()
Example #34
0
 def spider_opened(self, spider):
     file = open(
         '%s_%s.csv' %
         (spider.name,
          datetime.datetime.strftime(datetime.datetime.now(), '%Y%m%d')),
         'w+b')
     self.files[spider] = file
     self.exporter = CsvItemExporter(file)
     self.exporter.fields_to_export = ['url', 'year', 'filename']
     self.exporter.start_exporting()
Example #35
0
 def spider_opened(self, spider):
     file = open('%s_items.csv' % spider.name, 'w+b')
     self.files[spider] = file
     self.exporter = CsvItemExporter(file)
     self.exporter.fields_to_export = [
         'id', 'data', 'tipo', 'preco', 'area_util', 'area_construida',
         'n_quartos', 'vagas_garagem', 'titulo', 'bairro', 'municipio',
         'cep', 'descricao', 'url'
     ]
     self.exporter.start_exporting()
    def __init__(self):
        self.files = {}
        self.exporter1 = CsvItemExporter(fields_to_export=ProfRatingItem.fields.keys(),file=open("profRating.csv",'wb'))
        self.exporter2 = CsvItemExporter(fields_to_export=ProfSummaryItem.fields.keys(),file=open("profSummary.csv",'wb'))

	@classmethod
	def from_crawler(cls, crawler):
		pipeline = cls()
		crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
		crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
		return pipeline
Example #37
0
 def _exporter(self, item, spider):
     if spider.name not in self.files:
         self.files[spider.name] = {}
     name = item.__class__.__name__.lower()
     if name not in self.files[spider.name]:
         f = open('%s_%s.csv' % (spider.name, name), 'w+b')
         exporter = CsvItemExporter(f)
         exporter.start_exporting()
         self.files[spider.name][name] = (exporter, f)
     else:
         exporter, f = self.files[spider.name][name]
     return exporter
Example #38
0
	def spider_opened(self, spider):
		if spider.name in 'realestate':
			self.file = open('current_listing.csv', 'w+b')
		else:
			self.file = open('past_listing.csv', 'w+b')
		self.exporter = CsvItemExporter(self.file)
		self.exporter.start_exporting()
Example #39
0
 def spider_opened(self, spider):
     file = open('%s_%s_items.csv' % (spider.name, spider.category), 'w+b')
     self.files[spider] = file
     self.exporter = CsvItemExporter(file)
     self.exporter.fields_to_export = ['vendor', 'product', 'default']
     #self.exporter.fields_to_export = ['default']
     self.exporter.start_exporting()
Example #40
0
	def spider_opened(self, spider):
		file = open('%s_items.csv' % spider.name, 'w+b')
		self.files[spider] = file
		self.exporter = CsvItemExporter(file)
		self.exporter.include_headers_line=False
		self.exporter.fields_to_export = ["url","status","date","mls","address","price","beds","baths","homesize","lotsize","description","images"]
		self.exporter.start_exporting()
 def spider_opened(self, spider):
     file = open('%s_items.csv' % spider.name, 'w+b')
     self.files[spider] = file
     self.exporter = CsvItemExporter(file)
     list = ['id','title', 'time', 'director', 'year', 'star','cost']
     self.exporter.fields_to_export = list
     self.exporter.start_exporting()
Example #42
0
 def spider_opened(self, spider):
     file = open('/tmp/%s_log.csv' % spider.name, 'w+b')
     self.files[spider] = file
     self.exporter = CsvItemExporter(file)
     self.exporter.fields_to_export = ['_id', 'name', 'count', 'free', 'timestamp', 'lat', 'lon', 'url']
     self.exporter.include_headers_line = 'false';
     self.exporter.start_exporting()
Example #43
0
 def spider_opened(self, spider):
   file = open("mediabase.csv", 'w+b')
   self.files[spider] = file
   self.exporter = CsvItemExporter(file)
   #self.exporter.fields_to_export = ["Name","Address","City","Neighborhood","State","Zip","Phone","Website","Image_url","Hours_Mon","Hours_Tue","Hours_Wed","Hours_Thu","Hours_Fri","Hours_Sat","Hours_Sun","Price","TakesReservation","Delivery","TakeOut","AcceptsCreditCards","GoodFor","Parking","WheelChairAccessible","BikeParking","GoodForKids","GoodForGroups","Attire","Ambience","NoiseLevel","Alcohol","OutDoorSeating","Wifi","HasTV","WaiterService","Caters","Url"]
   self.exporter.fields_to_export = ["Type","Area","PlaceName","Web","Tel","Address","Zip","Town","Hours","CompanyName","OrganizationNo","Turnover","Employed","LastName","FirstName","Telephone","AllabolagUrl","EniroUrl"]
   self.exporter.start_exporting()
 def spider_opened(self, spider):
     file = open("%s_FL_Orlando_20150716.csv" % spider.name, "w+b")
     self.files[spider] = file
     self.exporter = CsvItemExporter(file)
     self.exporter.fields_to_export = [
         "state",
         "city",
         "neighborhood",
         "zip_code",
         "listing_type",
         "property_type",
         "construction",
         "lot",
         "mls_number",
         "parcel",
         "price",
         "zestimate",
         "zestimate_rent",
         "built_in",
         "bedrooms",
         "baths",
         "address",
         "description",
         "listing_provided_by",
         "url",
         "timestamp",
     ]
     self.exporter.start_exporting()
Example #45
0
	def open_spider(self,spider):
		self.file=open('wechat.csv','w+b')
		self.file.write('\xEF\xBB\xBF')
		self.exporter=CsvItemExporter(self.file)
		self.exporter.fields_to_export = ['page','wID','wtitle','wsub','Link']
		# self.exporter.fields_to_export = ['jobTitle','salary','education','ex','age','num','contact','location','updatetime','url','Requirement','comInfo','delivery_num']
		self.exporter.start_exporting()
Example #46
0
 def spider_opened(self, spider):
     file = open('%s_%s.csv' % (spider.name, int(time.time())), 'w+b')
     self.files[spider] = file
     #self.exporter = CsvItemExporter(file,fields_to_export = ['pid','price','curr','date','source','title','heading','url','sku','in_stock','image'],dialect='excel',delimiter=';')
     self.exporter = CsvItemExporter(file, fields_to_export=['product_id', 'price', 'price_usd', 'currency', 'when_created',
                                                             'source', 'title', 'heading', 'url', 'in_stock',
                                                             'image'], dialect='excel')
     self.exporter.start_exporting()
Example #47
0
	def spider_opened(self, spider):
		if isinstance(spider, MatchSpider):
			file = open('%s_asia_%s.csv' % (spider.name,spider.match_date), 'w')
		else:
			file = open('%s_output.csv' % spider.name, 'w')
		self.files[spider] = file
		self.exporter = CsvItemExporter(file)
		self.exporter.start_exporting()
Example #48
0
  def spider_opened(self, spider):
    file = open('%s_items.csv' % spider.name, 'w+b')
    self.files[spider] = file
    self.exporter = CsvItemExporter(file)
    self.exporter.fields_to_export = ['title', 'body']


    self.exporter.start_exporting()
Example #49
0
 def assertExportResult(self, item, expected, **kwargs):
     fp = BytesIO()
     ie = CsvItemExporter(fp, **kwargs)
     ie.start_exporting()
     ie.export_item(item)
     ie.finish_exporting()
     self.assertCsvEqual(fp.getvalue(), expected)
Example #50
0
class JnePipeline(object):
    
    candidato_filename = './output/candidato.csv'
    experiencia_laboral_filename = './output/experiencia_laboral.csv'

    def __init__(self):
        self.candidato_file = open(self.candidato_filename, 'wb')
        self.candidato_exporter = CsvItemExporter(self.candidato_file)
        self.experiencia_laboral_file = open(self.experiencia_laboral_filename, 'wb')
        self.experiencia_laboral_exporter = CsvItemExporter(self.experiencia_laboral_file)

    def process_item(self, item, spider):
        if isinstance(item, CandidatoItem):
            self.candidato_exporter.export_item(item)
        if isinstance(item, ExperienciaLaboralItem):
            self.experiencia_laboral_exporter.export_item(item)

        return item
Example #51
0
 def spider_opened(self, spider):
     file = open("{}_{}_{}__{}_{}_{}.csv".format(spider.pdda, spider.pddm, spider.pddj, spider.pdfa, spider.pdfm,
                                                 spider.pdfj), 'a+b')
     self.files[spider] = file
     kwargs = {}
     kwargs['delimiter'] = ';'
     kwargs['quoting'] = csv.QUOTE_ALL
     self.exporter = CsvItemExporter(file, include_headers_line=False, **kwargs)
     self.exporter.fields_to_export = ["name", "address", "zipcode", "city", "number", "date"]
     self.exporter.start_exporting()
Example #52
0
    def __init__(self):
        self.files = {}
        self.exporter1 = CsvItemExporter(fields_to_export=BillionPricesIndiaItem.fields.keys(),file=open("mobiles.csv",'wb'))

	@classmethod
	def from_crawler(cls, crawler):
		pipeline = cls()
		crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
		crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
		return pipeline
Example #53
0
 def test_header_export_two_items(self):
     for item in [self.i, dict(self.i)]:
         output = BytesIO()
         ie = CsvItemExporter(output)
         ie.start_exporting()
         ie.export_item(item)
         ie.export_item(item)
         ie.finish_exporting()
         self.assertCsvEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n22,John\xc2\xa3\r\n')
Example #54
0
    def spider_opened(self, spider):
        try:
            fo = open(spider.output_file, 'w+b')
        except IOError as e:
            spider.crawler.engine.close_spider(spider, "ERROR: Can't create CSV file: " + str(e))
            return

        self.files[spider] = fo
        self.exporter = CsvItemExporter(fo)
        self.exporter.fields_to_export = settings.getlist("EXPORT_FIELDS")
        self.exporter.start_exporting()
Example #55
0
class PointacrePipeline(object):
    users={}
    def __init__(self):
        self.user_file=open('users.csv','wb')
        self.user_exporter=CsvItemExporter(self.user_file)
        self.psw_file=open('passwd.csv','wb')
        self.psw_exporter=CsvItemExporter(self.psw_file)

    def process_item(self, item, spider):
        if spider.name=="1point3acres.user":
            if item.has_key('uid') and item['uid']:
                uid=item['uid'][0]
                if uid not in PointacrePipeline.users:
                    PointacrePipeline.users[uid]=True
                    self.user_exporter.export_item(item)
                else:
                    raise DropItem()
        elif spider.name=="1point3acres.login":
            self.psw_exporter.export_item(item)
        else:
            return item
Example #56
0
class ParkingCrawlerPipeline(object):
    def __init__(self):
        self.files = {}

    def process_item(self, item, spider):
        return item

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        file = open('/tmp/%s_log.csv' % spider.name, 'w+b')
        self.files[spider] = file
        self.exporter = CsvItemExporter(file)
        self.exporter.fields_to_export = ['_id', 'name', 'count', 'free', 'timestamp', 'lat', 'lon', 'url']
        self.exporter.include_headers_line = 'false';
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
Example #57
0
class CVSExport(object):

    def __init__(self):
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
         pipeline = cls()
         crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
         crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
         return pipeline

    def spider_opened(self, spider):
        file = open('postUGR_items.csv', 'w+b')
        self.files[spider] = file
        self.exporter = CsvItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
    	self.exporter.export_item(item)
    	return item
Example #58
0
class CsvExportPipeline(object):
    def __init__(self):
        # self.duplicates = {}
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        self.files = {}

    def spider_opened(self, spider):
        # self.duplicates[spider] = set()
        file = open("%s_%s.csv" % (spider.name, int(time.time())), "w+b")
        self.files[spider] = file
        self.exporter = CsvItemExporter(file, fields_to_export=["description", "phone"])
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        # del self.duplicates[spider]
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        # if item['description'] in self.duplicates[spider]:
        #    raise DropItem("Duplicateitemfound: %s" % item)
        # else:
        #    self.duplicates[spider].add(item['description'])
        #    self.exporter.export_item(item)
        #    return item
        if item is None:
            raise DropItem("None")
        self.exporter.export_item(item)
        return item
Example #59
0
class CSVPipeline(object):
    def __init__(self):
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        file = open("{}_{}_{}__{}_{}_{}.csv".format(spider.pdda, spider.pddm, spider.pddj, spider.pdfa, spider.pdfm,
                                                    spider.pdfj), 'a+b')
        self.files[spider] = file
        kwargs = {}
        kwargs['delimiter'] = ';'
        kwargs['quoting'] = csv.QUOTE_ALL
        self.exporter = CsvItemExporter(file, include_headers_line=False, **kwargs)
        self.exporter.fields_to_export = ["name", "address", "zipcode", "city", "number", "date"]
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item