def open_spider(self, spider): if spider.name == 'fto_branch': self.file = open('./output/transactions_alt' + '.csv', 'w+b') self.exporter = CsvItemExporter(self.file) self.exporter.start_exporting()
def spider_closed(self, spider): try: file = open('res.csv', 'w+b') self.exporter = CsvItemExporter(file) self.exporter.fields_to_export = [ 'name', 'image', 'link', 'address' ] self.exporter.start_exporting() for item in self.ordered: for res in self.result: if item['name'] == res['name']: self.exporter.export_item(item) self.exporter.finish_exporting() file.close() except: pass
def open_spider(self, spider): if spider.name == 'fto_urls': self.file = open('./output/' + spider.stage + '.csv', 'w+b') self.exporter = CsvItemExporter(self.file) self.exporter.start_exporting()
def spider_opened(self, spider): file = open('%s_%s.csv' % (spider.name, datetime.datetime.strftime(datetime.datetime.now(),'%Y%m%d')), 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) # self.exporter.fields_to_export = ['company_name','contact','phone1','phone2','email','average','reviews','address','member_for','based_in'] self.exporter.fields_to_export = ['title', 'save', 'desc', 'long_desc', 'image'] self.exporter.start_exporting()
def spider_opened(self, spider): if spider.name == 'mcc1': self.file = open('MicrocapCollector/spiders/data/data1.csv', 'w+b') if spider.name == 'mcc2': self.file = open('MicrocapCollector/spiders/data/data2.csv', 'w+b') self.exporter = CsvItemExporter(self.file, delimiter=',') self.exporter.start_exporting()
def spider_opened(self, spider): latest_polls_file = open('data/' + spider.name + '_latest.csv', 'w') self.latest_polls_files[spider] = latest_polls_file exporter = CsvItemExporter(latest_polls_file, fields_to_export=spider.fields_to_export) exporter.start_exporting() self.exporters[spider] = exporter prev_polls_fName = 'data/' + spider.name + '_dict.json' try: prev_polls_file = open(prev_polls_fName, 'r') prev_polls = json.load(prev_polls_file) prev_polls_file.close() except (IOError): # data/dict.json doesn't exist prev_polls = [] except ValueError: # dict.json is malformed, should be inspected before being overwritten raise ValueError("Malformed prev_polls_file for " + spider.name + ".") self.prev_polls_fNames[spider] = prev_polls_fName self.prev_polls[spider] = prev_polls self.newitems[spider] = []
class CsdnPipeline(object): users={} def __init__(self): self.file=None self.exporter=None def set_file(self,filename): self.file=open(filename,'wb') self.exporter=CsvItemExporter(self.file) def process_item(self, item, spider): if spider.name=="csdn.user": if self.file is None: self.set_file("export_users.csv") else: self.exporter.export_item(item) if spider.name=="csdn.login": if item['username']: sql='update t_csdn_users set real_password=:password,real_email=:email where username=:username'; username=item['username'] password=item['password'] email=item['email'] param={'username':username,'password':password,'email':email} spider.oracle_db.execute_sql(sql,param,False) log.msg("username:"******"\tpassword:"+password,level=log.INFO) return item
def spider_opened(self, spider): file = open('%s_items.csv' % spider.name, 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) list = ['id', 'title', 'time', 'director', 'year', 'star', 'cost'] self.exporter.fields_to_export = list self.exporter.start_exporting()
def spider_opened(self, spider): file = open('%s.csv' % spider.name, 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) if spider.name == "all-coins": self.exporter.fields_to_export = [ 'rank', 'name', 'type', 'symbol', 'website', 'market_cap_usd', 'price_usd', 'price_btc', 'volume_24_usd', 'change_24', ] elif spider.name == "historical-data": self.exporter.fields_to_export = [ 'date', 'open_price', 'high_price', 'low_price', 'close_price', 'volume', 'market_cap', ] self.exporter.start_exporting()
def spider_opened(self, spider): fname = self.get_file_name(spider, "csv") file = open(fname, 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file, fields_to_export=self.fields_to_export) self.exporter.start_exporting()
def spider_opened(self, spider): filename = 'douban_tv_hanju.csv' savefile = open(filename, 'wb+') self.files[spider] = savefile print('==========pipeline==========spider_opened==========') self.exporter = CsvItemExporter(savefile) self.exporter.start_exporting()
def spider_opened(self, spider): file = open('Pets_LasVegas.csv', 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) self.exporter.fields_to_export = ['business_name', 'industry_category', 'city', 'state', 'phone_number', 'street_address', 'website', 'email', 'url', 'count'] self.exporter.start_exporting()
def spider_open(self, spider): self.csv_exporter_item = CsvItemExporter(open("carepack.csv", "w"),quoting=csv.QUOTE_ALL ) self.csv_exporter_detail = CsvItemExporter(open("model.csv" ,"w" ),quoting=csv.QUOTE_ALL ) # Make a quick copy of the list self.csv_exporter_item.start_exporting() self.csv_exporter_detail.start_exporting()
def open_spider(self, spider): if spider.name == 'fto_material': self.file = open('./output/fto_material.csv', 'w+b') self.exporter = CsvItemExporter(self.file) self.exporter.start_exporting()
def spider_opened(self, spider): file = open('%s_items.csv' % spider.name, 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) self.exporter.fields_to_export = [ 'id', 'name', 'link', 'index', 'parent_id' ] self.exporter.start_exporting()
def spider_opened(self, spider): file = open('%s_items.csv' % spider.name, 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) self.exporter.fields_to_export = [ 'title', 'link', 'location', 'original_price', 'price', 'end_date' ] self.exporter.start_exporting()
def spider_opened(self, spider): file = open('%s_%s.csv' % (spider.name, int(time.time())), 'w+b') self.files[spider] = file if 'yopt' in spider.name: self.exporter = CsvItemExporter(file,fields_to_export = ['date','instrument','option_symbol','symbol','expiration','type','strike','last','change','bid','ask','volume','open_int'],dialect='excel') elif 'prices' in spider.name: self.exporter = CsvItemExporter(file,fields_to_export = ['date','open','high','low','close','volume','adj_close'],dialect='excel') self.exporter.start_exporting()
def spider_opened(self, spider): file = open( getattr(spider, 'output_filename', 'result_{}.csv'.format(spider.name)), 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) self.exporter.fields_to_export = getattr(spider, 'output_fields', None) self.exporter.start_exporting()
class ExportCSV(ExportData): """ Exporting to export/csv/spider-name.csv file """ def spider_opened(self, spider): file_to_save = open('exports/csv/%s.csv' % spider.name, 'w+b') self.files[spider] = file_to_save self.exporter = CsvItemExporter(file_to_save) self.exporter.start_exporting()
def spider_opened(self, spider): file = open('%s_result.csv' % spider.name, 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) self.exporter.fields_to_export = [ 'title', 'brand', 'description', 'price', 'main_image_url', 'additional_image_urls', 'sku', 'category' ] self.exporter.start_exporting()
class WriteToCsv(object): @classmethod def process_item(self, item, spider): self.file = open(settings.csv_file_path, 'ab+') self.exporter = CsvItemExporter(self.file, include_headers_line=False) self.exporter.fields_to_export = settings.csv_export_fields self.exporter.export_item(item) return item
def spider_opened(self, spider): self.file = open('data.csv', 'w+b') self.exporter = CsvItemExporter(self.file) self.exporter.fields_to_export = [ 'product_asin', 'product_name', 'product_is_have_patten', 'product_description', 'image_link', 'original_image', 'color', 'patten', 'price', 'imported_code' ] self.exporter.start_exporting()
def spider_opened(self, spider): file = open('webScrape.csv', 'w') self.files[spider] = file self.exporter = CsvItemExporter(file, True) self.exporter.fields_to_export = [ 'match', 'wcRound', 'group', 'date', 'year', 'venue', 'hometeam', 'results', 'awayteam' ] self.exporter.start_exporting()
def __init__(self): self.keystat_file = open('yahoo_keystat.csv', 'w+b') self.summary_file = open('yahoo_summary.csv', 'w+b') self.summary_exporter = CsvItemExporter(self.summary_file) self.keystat_exporter = CsvItemExporter(self.keystat_file) self.summary_exporter.start_exporting() self.keystat_exporter.start_exporting() dispatcher.connect(self.spider_closed, signals.spider_closed)
def __init__(self, settings): #here exist two variant #file with My_Exporter_URI exist: this is the case of a CsvItemExporter instance initilized with include_headers_line=False #file with My_Exporter_URI doesn't exist: is the case of include_headers_line=True self.filename = settings['My_Exporter_URI'] include_headers_line = False if os.path.isfile(self.filename) else True self.fileCsv = open(self.filename, 'ab') self.exporter = CsvItemExporter( self.fileCsv, include_headers_line=include_headers_line)
def spider_opened(self, spider): self.file = open('%s.csv' % (spider.name), 'w+b') self.exporter = CsvItemExporter(self.file) self.exporter.fields_to_export = self.headers self.exporter.start_exporting()
def spider_opened(self, spider): file = open('%s_items.csv' % spider.name, 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) self.exporter.fields_to_export = [ 'Subject', 'Start_Date', 'Start_Time', 'End_Date', 'End_Time', 'Location', 'All_Day_Event' ] self.exporter.start_exporting()
def spider_opened(self, spider): file = open('%s_items.csv' % spider.name, 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) self.exporter.fields_to_export = [ 'Company Name', 'Current Price', 'Previous Close', 'Day\'s Range', 'Historical Volatility', 'Market Cap', 'Shares Outstanding', 'EPS', 'P/E Ratio', 'Beta (Volatility)', 'Percent Held by Institutions' ] self.exporter.start_exporting()
class AntigooglewebPipeline(object): def spider_opened(self, spider): print(file) print('hh') self.file = open('crawloutput.csv', 'ab') #w+b self.exporter = CsvItemExporter(self.file) self.exporter.start_exporting() def process_item(self, item, spider): return item
def spider_opened(self, spider): file = open('%s_items.csv' % spider.name, 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) self.exporter.fields_to_export = [ 'section', 'section_url', 'course', 'course_url', 'material_section', 'material_section_url', 'material', 'material_url' ] self.exporter.start_exporting()
def spider_opened(self, spider): includeHeader = not os.path.isfile(self.filename) if (not includeHeader): self.load_existing_posts(self.filename) file = open(self.filename, 'a+b') self.files[spider] = file self.exporter = CsvItemExporter(file, include_headers_line=includeHeader) self.exporter.fields_to_export = ['title', 'post_date', 'price', 'city', 'url'] self.exporter.start_exporting()
def __init__(self): data_file = "%s.csv" % (datetime.datetime.now().strftime("%Y-%m-%d")) if os.path.exists(data_file): self.file = open(data_file,"ab+") self.exporter = CsvItemExporter(self.file,include_headers_line=True,encoding="gbk") else: self.file = open(data_file, "wb+") self.exporter = CsvItemExporter(self.file, include_headers_line=True, encoding="gbk") self.exporter.start_exporting()
def spider_opened(self, spider): file = open( '%s_%s.csv' % (spider.name, datetime.datetime.strftime(datetime.datetime.now(), '%Y%m%d')), 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) self.exporter.fields_to_export = ['url', 'year', 'filename'] self.exporter.start_exporting()
def spider_opened(self, spider): file = open('%s_items.csv' % spider.name, 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) self.exporter.fields_to_export = [ 'id', 'data', 'tipo', 'preco', 'area_util', 'area_construida', 'n_quartos', 'vagas_garagem', 'titulo', 'bairro', 'municipio', 'cep', 'descricao', 'url' ] self.exporter.start_exporting()
def __init__(self): self.files = {} self.exporter1 = CsvItemExporter(fields_to_export=ProfRatingItem.fields.keys(),file=open("profRating.csv",'wb')) self.exporter2 = CsvItemExporter(fields_to_export=ProfSummaryItem.fields.keys(),file=open("profSummary.csv",'wb')) @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline
def _exporter(self, item, spider): if spider.name not in self.files: self.files[spider.name] = {} name = item.__class__.__name__.lower() if name not in self.files[spider.name]: f = open('%s_%s.csv' % (spider.name, name), 'w+b') exporter = CsvItemExporter(f) exporter.start_exporting() self.files[spider.name][name] = (exporter, f) else: exporter, f = self.files[spider.name][name] return exporter
def spider_opened(self, spider): if spider.name in 'realestate': self.file = open('current_listing.csv', 'w+b') else: self.file = open('past_listing.csv', 'w+b') self.exporter = CsvItemExporter(self.file) self.exporter.start_exporting()
def spider_opened(self, spider): file = open('%s_%s_items.csv' % (spider.name, spider.category), 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) self.exporter.fields_to_export = ['vendor', 'product', 'default'] #self.exporter.fields_to_export = ['default'] self.exporter.start_exporting()
def spider_opened(self, spider): file = open('%s_items.csv' % spider.name, 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) self.exporter.include_headers_line=False self.exporter.fields_to_export = ["url","status","date","mls","address","price","beds","baths","homesize","lotsize","description","images"] self.exporter.start_exporting()
def spider_opened(self, spider): file = open('%s_items.csv' % spider.name, 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) list = ['id','title', 'time', 'director', 'year', 'star','cost'] self.exporter.fields_to_export = list self.exporter.start_exporting()
def spider_opened(self, spider): file = open('/tmp/%s_log.csv' % spider.name, 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) self.exporter.fields_to_export = ['_id', 'name', 'count', 'free', 'timestamp', 'lat', 'lon', 'url'] self.exporter.include_headers_line = 'false'; self.exporter.start_exporting()
def spider_opened(self, spider): file = open("mediabase.csv", 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) #self.exporter.fields_to_export = ["Name","Address","City","Neighborhood","State","Zip","Phone","Website","Image_url","Hours_Mon","Hours_Tue","Hours_Wed","Hours_Thu","Hours_Fri","Hours_Sat","Hours_Sun","Price","TakesReservation","Delivery","TakeOut","AcceptsCreditCards","GoodFor","Parking","WheelChairAccessible","BikeParking","GoodForKids","GoodForGroups","Attire","Ambience","NoiseLevel","Alcohol","OutDoorSeating","Wifi","HasTV","WaiterService","Caters","Url"] self.exporter.fields_to_export = ["Type","Area","PlaceName","Web","Tel","Address","Zip","Town","Hours","CompanyName","OrganizationNo","Turnover","Employed","LastName","FirstName","Telephone","AllabolagUrl","EniroUrl"] self.exporter.start_exporting()
def spider_opened(self, spider): file = open("%s_FL_Orlando_20150716.csv" % spider.name, "w+b") self.files[spider] = file self.exporter = CsvItemExporter(file) self.exporter.fields_to_export = [ "state", "city", "neighborhood", "zip_code", "listing_type", "property_type", "construction", "lot", "mls_number", "parcel", "price", "zestimate", "zestimate_rent", "built_in", "bedrooms", "baths", "address", "description", "listing_provided_by", "url", "timestamp", ] self.exporter.start_exporting()
def open_spider(self,spider): self.file=open('wechat.csv','w+b') self.file.write('\xEF\xBB\xBF') self.exporter=CsvItemExporter(self.file) self.exporter.fields_to_export = ['page','wID','wtitle','wsub','Link'] # self.exporter.fields_to_export = ['jobTitle','salary','education','ex','age','num','contact','location','updatetime','url','Requirement','comInfo','delivery_num'] self.exporter.start_exporting()
def spider_opened(self, spider): file = open('%s_%s.csv' % (spider.name, int(time.time())), 'w+b') self.files[spider] = file #self.exporter = CsvItemExporter(file,fields_to_export = ['pid','price','curr','date','source','title','heading','url','sku','in_stock','image'],dialect='excel',delimiter=';') self.exporter = CsvItemExporter(file, fields_to_export=['product_id', 'price', 'price_usd', 'currency', 'when_created', 'source', 'title', 'heading', 'url', 'in_stock', 'image'], dialect='excel') self.exporter.start_exporting()
def spider_opened(self, spider): if isinstance(spider, MatchSpider): file = open('%s_asia_%s.csv' % (spider.name,spider.match_date), 'w') else: file = open('%s_output.csv' % spider.name, 'w') self.files[spider] = file self.exporter = CsvItemExporter(file) self.exporter.start_exporting()
def spider_opened(self, spider): file = open('%s_items.csv' % spider.name, 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) self.exporter.fields_to_export = ['title', 'body'] self.exporter.start_exporting()
def assertExportResult(self, item, expected, **kwargs): fp = BytesIO() ie = CsvItemExporter(fp, **kwargs) ie.start_exporting() ie.export_item(item) ie.finish_exporting() self.assertCsvEqual(fp.getvalue(), expected)
class JnePipeline(object): candidato_filename = './output/candidato.csv' experiencia_laboral_filename = './output/experiencia_laboral.csv' def __init__(self): self.candidato_file = open(self.candidato_filename, 'wb') self.candidato_exporter = CsvItemExporter(self.candidato_file) self.experiencia_laboral_file = open(self.experiencia_laboral_filename, 'wb') self.experiencia_laboral_exporter = CsvItemExporter(self.experiencia_laboral_file) def process_item(self, item, spider): if isinstance(item, CandidatoItem): self.candidato_exporter.export_item(item) if isinstance(item, ExperienciaLaboralItem): self.experiencia_laboral_exporter.export_item(item) return item
def spider_opened(self, spider): file = open("{}_{}_{}__{}_{}_{}.csv".format(spider.pdda, spider.pddm, spider.pddj, spider.pdfa, spider.pdfm, spider.pdfj), 'a+b') self.files[spider] = file kwargs = {} kwargs['delimiter'] = ';' kwargs['quoting'] = csv.QUOTE_ALL self.exporter = CsvItemExporter(file, include_headers_line=False, **kwargs) self.exporter.fields_to_export = ["name", "address", "zipcode", "city", "number", "date"] self.exporter.start_exporting()
def __init__(self): self.files = {} self.exporter1 = CsvItemExporter(fields_to_export=BillionPricesIndiaItem.fields.keys(),file=open("mobiles.csv",'wb')) @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline
def test_header_export_two_items(self): for item in [self.i, dict(self.i)]: output = BytesIO() ie = CsvItemExporter(output) ie.start_exporting() ie.export_item(item) ie.export_item(item) ie.finish_exporting() self.assertCsvEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n22,John\xc2\xa3\r\n')
def spider_opened(self, spider): try: fo = open(spider.output_file, 'w+b') except IOError as e: spider.crawler.engine.close_spider(spider, "ERROR: Can't create CSV file: " + str(e)) return self.files[spider] = fo self.exporter = CsvItemExporter(fo) self.exporter.fields_to_export = settings.getlist("EXPORT_FIELDS") self.exporter.start_exporting()
class PointacrePipeline(object): users={} def __init__(self): self.user_file=open('users.csv','wb') self.user_exporter=CsvItemExporter(self.user_file) self.psw_file=open('passwd.csv','wb') self.psw_exporter=CsvItemExporter(self.psw_file) def process_item(self, item, spider): if spider.name=="1point3acres.user": if item.has_key('uid') and item['uid']: uid=item['uid'][0] if uid not in PointacrePipeline.users: PointacrePipeline.users[uid]=True self.user_exporter.export_item(item) else: raise DropItem() elif spider.name=="1point3acres.login": self.psw_exporter.export_item(item) else: return item
class ParkingCrawlerPipeline(object): def __init__(self): self.files = {} def process_item(self, item, spider): return item @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline def spider_opened(self, spider): file = open('/tmp/%s_log.csv' % spider.name, 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) self.exporter.fields_to_export = ['_id', 'name', 'count', 'free', 'timestamp', 'lat', 'lon', 'url'] self.exporter.include_headers_line = 'false'; self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() file = self.files.pop(spider) file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
class CVSExport(object): def __init__(self): self.files = {} @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline def spider_opened(self, spider): file = open('postUGR_items.csv', 'w+b') self.files[spider] = file self.exporter = CsvItemExporter(file) self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() file = self.files.pop(spider) file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
class CsvExportPipeline(object): def __init__(self): # self.duplicates = {} dispatcher.connect(self.spider_opened, signals.spider_opened) dispatcher.connect(self.spider_closed, signals.spider_closed) self.files = {} def spider_opened(self, spider): # self.duplicates[spider] = set() file = open("%s_%s.csv" % (spider.name, int(time.time())), "w+b") self.files[spider] = file self.exporter = CsvItemExporter(file, fields_to_export=["description", "phone"]) self.exporter.start_exporting() def spider_closed(self, spider): # del self.duplicates[spider] self.exporter.finish_exporting() file = self.files.pop(spider) file.close() def process_item(self, item, spider): # if item['description'] in self.duplicates[spider]: # raise DropItem("Duplicateitemfound: %s" % item) # else: # self.duplicates[spider].add(item['description']) # self.exporter.export_item(item) # return item if item is None: raise DropItem("None") self.exporter.export_item(item) return item
class CSVPipeline(object): def __init__(self): self.files = {} @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline def spider_opened(self, spider): file = open("{}_{}_{}__{}_{}_{}.csv".format(spider.pdda, spider.pddm, spider.pddj, spider.pdfa, spider.pdfm, spider.pdfj), 'a+b') self.files[spider] = file kwargs = {} kwargs['delimiter'] = ';' kwargs['quoting'] = csv.QUOTE_ALL self.exporter = CsvItemExporter(file, include_headers_line=False, **kwargs) self.exporter.fields_to_export = ["name", "address", "zipcode", "city", "number", "date"] self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() file = self.files.pop(spider) file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item