Esempio n. 1
0
    def process_item(self, item, spider):
        try:
            item['news_website'] = spider.ref_object
            item['search_term'] = spider.search_terms #I added this its how we see what was searched per item
            
            checker_rt = SchedulerRuntime(runtime_type='C')
            checker_rt.save()
            item['checker_runtime'] = checker_rt
            item.save()


#            p1 = Den.objects.get(title='baby')
#            a1 = Article(search_term=spider.search_terms)
#            a1.dens.add(p1)


#            busi = item.save(commit=False)
#            p1 = Den.objects.get(title='pretty')
#            busi.dens.add(p1)
#            p1 = Den(title='pretty')
#            a1 = Article(search_term=spider.search_terms)
#            a1.dens.add(p1)
#            p1 = Den(title='pretty')
#            a1 = Article(search_term=spider.search_terms)
#            a1.dens.add(p1)

            spider.action_successful = True
            spider.log("Item saved.", log.INFO)
                
        except IntegrityError, e:
            spider.log(str(e), log.ERROR)
            raise DropItem("Missing attribute.")
 def test_double_standard_id_field(self):
     checker_rt = SchedulerRuntime()
     checker_rt.save()
     event = Event(title=u'Event 1', event_website=self.event_website,
         description=u'Event 1 description',
         url=u'http://localhost:8010/static/site_generic/event5.html',
         checker_runtime=checker_rt)
     event.save()
     event = Event(title=u'Event 2', event_website=self.event_website,
         description=u'Event 1 description',
         url=u'http://localhost:8010/static/site_generic/event6.html',
         checker_runtime=checker_rt)
     event.save()
     event = Event(title=u'Event 1', event_website=self.event_website,
         description=u'Event 2 description',
         url=u'http://localhost:8010/static/site_generic/event7.html',
         checker_runtime=checker_rt)
     event.save()
     self.soa_url.id_field = False
     self.soa_url.save()
     self.soa_title.id_field = True
     self.soa_title.save()
     self.soa_desc.id_field = True
     self.soa_desc.save()
     self.run_event_spider(1)
     
     self.assertEqual(len(Event.objects.all()), 6)
     self.assertEqual(Event.objects.filter(description='Event 1 description').count(), 2)
Esempio n. 3
0
    def process_item(self, item, spider):
        if spider.conf['DO_ACTION']:  # Necessary since DDS v.0.9+
            try:
                item['races_website'] = spider.ref_object

                checker_rt = SchedulerRuntime(runtime_type='C')
                checker_rt.save()
                item['checker_runtime'] = checker_rt
                item['date'] = self.process_date(item['date'], spider)
                item['city'] = self.process_city(item['city'], spider)
                item['province'] = self.process_province(item['province'], spider)
                item.save()
                spider.action_successful = True
                spider.log("Item saved.", logging.INFO)

            except IntegrityError as e:
                spider.log(str(e), logging.ERROR)
                spider.log(str(item._errors), logging.ERROR)
                raise DropItem("Missing attribute.")
        else:
            if not item.is_valid():
                spider.log(str(item._errors), logging.ERROR)
                raise DropItem("Missing attribute.")

        return item
Esempio n. 4
0
    def process_item(self, item, spider):
        try:

            checker_rt = SchedulerRuntime(runtime_type='C')
            checker_rt.save()
            item['checker_runtime'] = checker_rt
            item['source'] = spider.ref_object

            try:
                item_model = item_to_model(item)
            except TypeError:
                return item
            
            model, created = get_or_create(item_model)
            
            update_model(model, item_model)
            
            if created:
                spider.log('==' + model.name + '== created.', log.INFO)
                
            else:
                spider.log('==' + model.name + '== updated.', log.INFO)

            spider.action_successful = True

        except IntegrityError, e:
            spider.log(str(e), log.ERROR)
            raise DropItem("Missing attribute.")
    def process_item(self, item, spider):
        if spider.conf['DO_ACTION']:  # Necessary since DDS v.0.9+
            try:
                item['source'] = spider.ref_object

                checker_rt = SchedulerRuntime(runtime_type='C')
                checker_rt.save()
                item['checker_runtime'] = checker_rt

                item.save()
                spider.action_successful = True
                dds_id_str = str(item._dds_item_page) + '-' + str(
                    item._dds_item_id)
                spider.struct_log(
                    "{cs}Item {id} saved to Django DB.{ce}".format(
                        id=dds_id_str,
                        cs=spider.bcolors['OK'],
                        ce=spider.bcolors['ENDC']))

            except IntegrityError as e:
                spider.log(str(e), logging.ERROR)
                spider.log(str(item._errors), logging.ERROR)
                raise DropItem("Missing attribute.")
        else:
            if not item.is_valid():
                spider.log(str(item._errors), logging.ERROR)
                raise DropItem("Missing attribute.")

        return item
Esempio n. 6
0
 def process_item(self, item, spider):
     
     if isinstance(spider,ProductSpider):
         #spider.log("spider: " + spider.name)
         spider.log("item time is: " + item['time'])
         item['time']=process_date(item['time'])
         # to do:
         # drop item if price is null
         # drop item if time > no            
     try:
         #if (item == ArticleItem):
             #item['news_website'] = spider.ref_object
         #else:
         item['source'] = spider.ref_object
         
         checker_rt = SchedulerRuntime(runtime_type='C')
         checker_rt.save()
         item['checker_runtime'] = checker_rt
         
         item.save()
         spider.action_successful = True
         spider.log("Item saved.", log.INFO)           
             
     except IntegrityError, e:
         spider.log(str(e), log.ERROR)
         raise DropItem("Missing attribute.")
Esempio n. 7
0
    def process_item(self, item, spider):

        if spider.conf['DO_ACTION']:

            try:
                item['website'] = spider.ref_object

                checker_rt = SchedulerRuntime(runtime_type='C')
                checker_rt.save()
                item['checker_runtime'] = checker_rt

                item.save()
                spider.action_successful = True
                spider.log("Item saved.", logging.INFO)

            except IntegrityError as e:
                spider.log(str(e), logging.ERROR)
                spider.log(str(item._errors), logging.ERROR)
                raise DropItem("Missing attribute.")
        else:
            if not item.is_valid():
                spider.log(str(item._errors), logging.ERROR)
                raise DropItem("Missing attribute.")

        return item
    def process_item(self, item, spider):
        if spider.conf['DO_ACTION']:  # Necessary since DDS v.0.9+
            try:
                item['news_website'] = spider.ref_object
                if 'description' in item:
                    item['description'] = convert_Html_to_text_and_make_sumarization(item['description'])
                if 'image' in item:
                    item['image'] = change_image_size(item['image'],spider.ref_object.name)


                checker_rt = SchedulerRuntime(runtime_type='C')
                checker_rt.save()
                item['checker_runtime'] = checker_rt

                item.save()
                spider.action_successful = True
                dds_id_str = str(item._dds_item_page) + '-' + str(item._dds_item_id)
                spider.struct_log("{cs}Item {id} saved to Django DB.{ce}".format(
                    id=dds_id_str,
                    cs=spider.bcolors['OK'],
                    ce=spider.bcolors['ENDC']))

            except IntegrityError as e:
                spider.log(str(e), logging.ERROR)
                spider.log(str(item._errors), logging.ERROR)
                raise DropItem("Missing attribute.")
        else:
            if not item.is_valid():
                spider.log(str(item._errors), logging.ERROR)
                raise DropItem("Missing attribute.")

        return item
 def test_double(self):
     checker_rt = SchedulerRuntime()
     checker_rt.save()
     event = Event(title=u'Event 1', url=u'http://localhost:8010/static/site_generic/event1.html',
         checker_runtime=checker_rt)
     event.save()
     self.run_event_spider(1)
     
     self.assertEqual(len(Event.objects.all()), 4)
     self.assertEqual(len(Event.objects.filter(title='Event 1')), 1)
def add_listing_checker(listing):
  listing_source_cfg = ListingSourceScraperConfig.objects.get(pk=listing.listing_source_id)

  checker_rt = SchedulerRuntime(runtime_type='C', next_action_time=timezone.now() + timedelta(days=1))
  checker_rt.save()

  checker_config = ListingCheckerConfig(listing=listing, checker_runtime=checker_rt, scraper=listing_source_cfg.scraper)
  checker_config.save()

  return checker_config
 def test_detail_page_url_id_field(self):
     checker_rt = SchedulerRuntime()
     checker_rt.save()
     event = Event(title=u'Event 1', event_website=self.event_website, 
         url=u'http://localhost:8010/static/site_generic/event5.html',
         checker_runtime=checker_rt)
     event.save()
     self.run_event_spider(1)
     
     self.assertEqual(len(Event.objects.all()), 5)
     self.assertEqual(Event.objects.filter(title='Event 1').count(), 2)
 def test_double(self):
     checker_rt = SchedulerRuntime()
     checker_rt.save()
     event = Event(title=u'Event 1', event_website=self.event_website, 
         url=u'http://localhost:8010/static/site_generic/event1.html',
         checker_runtime=checker_rt)
     event.save()
     self.run_event_spider(1)
     
     self.assertEqual(len(Event.objects.all()), 4)
     self.assertEqual(len(Event.objects.filter(title='Event 1')), 1)
 def setUp(self):
     super(CheckerRunTest, self).setUp()
     
     self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()'
     self.scraper.checker_x_path_result = u'Event was deleted!'
     self.scraper.save()
     
     scheduler_rt = SchedulerRuntime()
     scheduler_rt.save()
     
     self.event = Event(title='Event 1', event_website=self.event_website,
         description='Event 1 description', 
         url='http://localhost:8010/static/site_for_checker/event1.html',
         checker_runtime=scheduler_rt)
     self.event.save()
Esempio n. 14
0
    def process_item(self, item, spider):
        try:
            item['news_website'] = spider.ref_object

            checker_rt = SchedulerRuntime(runtime_type='C')
            checker_rt.save()
            item['checker_runtime'] = checker_rt

            item.save()
            spider.action_successful = True
            spider.log("Item saved.", log.INFO)

        except IntegrityError, e:
            spider.log(str(e), log.ERROR)
            raise DropItem("Missing attribute.")
Esempio n. 15
0
    def process_item(self, item, spider):
        try:
            item['news_website'] = spider.ref_object

            checker_rt = SchedulerRuntime(runtime_type='C')
            checker_rt.save()
            item['checker_runtime'] = checker_rt

            item.save()
            spider.action_successful = True
            spider.log("Item saved.", log.INFO)

        except IntegrityError, e:
            spider.log(str(e), log.ERROR)
            raise DropItem("Missing attribute.")
Esempio n. 16
0
	def process_item(self, item, spider):
		if spider.conf['DO_ACTION']:
			try:
				item['job_website'] = spider.ref_object

				checker_rt = SchedulerRuntime(runtime_type='C')
				checker_rt.save()
				item['checker_runtime'] = checker_rt
				item.save()
				spider.action_successful = True
				spider.log("Items saved in the DB", logging.INFO)

			except IntegrityError, e:
				spider.log(str(e), logging.ERROR)
				raise DropItem("missing attrib")
 def test_standard_update_field_update(self):
     checker_rt = SchedulerRuntime()
     checker_rt.save()
     event = Event(title=u'Event 1 - Old Title', event_website=self.event_website, 
         url=u'http://localhost:8010/static/site_generic/event1.html',
         checker_runtime=checker_rt)
     event.save()
     self.soa_title.attr_type = 'T'
     self.soa_title.save()
     
     self.run_event_spider(1)
     
     event_updated = Event.objects.get(pk=event.id)
     self.assertEqual(event_updated.title, 'Event 1')
     self.assertEqual(len(Event.objects.filter(title='Event 1 - Old Title')), 0)
 def extraSetUpHTMLChecker(self):
     self.scraper.checker_type = 'X'
     self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()'
     self.scraper.checker_x_path_result = u'Event not found!'
     self.scraper.checker_ref_url = u'http://localhost:8010/static/site_with_json_content_type/event_not_found.html'
     self.scraper.save()
     
     scheduler_rt = SchedulerRuntime()
     scheduler_rt.save()
     
     self.event = Event(title='Event 1', event_website=self.event_website,
         description='Event 1 description', 
         url='http://localhost:8010/static/site_with_json_content_type/event_not_found.html',
         checker_runtime=scheduler_rt)
     self.event.save()
Esempio n. 19
0
 def test_single_standard_id_field(self):
     checker_rt = SchedulerRuntime()
     checker_rt.save()
     event = Event(title='Event 1', event_website=self.event_website, 
         url='http://localhost:8010/static/site_generic/event5.html',
         checker_runtime=checker_rt)
     event.save()
     self.soa_url.id_field = False
     self.soa_url.save()
     self.soa_title.id_field = True
     self.soa_title.save()
     self.run_event_spider(1)
     
     self.assertEqual(len(Event.objects.all()), 4)
     self.assertEqual(Event.objects.filter(title='Event 1').count(), 1)
    def setUpScraperJSChecker(self, path):
        super(ScraperJSRunTest, self).setUp()

        self.scraper.checker_type = 'X'
        self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()'
        self.scraper.checker_ref_url = u'%ssite_with_js/event_not_found.html' % path
        self.scraper.save()
        
        scheduler_rt = SchedulerRuntime()
        scheduler_rt.save()
        
        self.event = Event(title='Event 1', event_website=self.event_website,
            description='Event 1 description', 
            url='%ssite_with_js/event_not_found.html' % path,
            checker_runtime=scheduler_rt)
        self.event.save()
Esempio n. 21
0
    def process_item(self, item, spider):
        try:
            # This name must match Article's source
            item["source"] = spider.ref_object

            checker_rt = SchedulerRuntime(runtime_type="C")
            checker_rt.save()
            item["checker_runtime"] = checker_rt

            item.save()
            spider.action_successful = True
            spider.log("Item saved.", log.INFO)

        except IntegrityError, e:
            spider.log(str(e), log.ERROR)
            raise DropItem("Missing attribute.")
 def extraSetUpJSONChecker(self):
     self.scraper.detail_page_content_type = 'J'
     self.scraper.checker_type = 'X'
     self.scraper.checker_x_path = u'event_not_found'
     self.scraper.checker_x_path_result = u'Event not found!'
     self.scraper.checker_ref_url = u'http://localhost:8010/static/site_with_json_content_type/event_not_found.json'
     self.scraper.save()
     
     scheduler_rt = SchedulerRuntime()
     scheduler_rt.save()
     
     self.event = Event(title='Event 1', event_website=self.event_website,
         description='Event 1 description', 
         url='http://localhost:8010/static/site_with_json_content_type/event_not_found.json',
         checker_runtime=scheduler_rt)
     self.event.save()
Esempio n. 23
0
    def setUpScraperJSChecker(self, path):
        super(ScraperJSRunTest, self).setUp()

        self.scraper.checker_type = 'X'
        self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()'
        self.scraper.checker_ref_url = u'%ssite_with_js/event_not_found.html' % path
        self.scraper.save()

        scheduler_rt = SchedulerRuntime()
        scheduler_rt.save()

        self.event = Event(title='Event 1',
                           event_website=self.event_website,
                           description='Event 1 description',
                           url='%ssite_with_js/event_not_found.html' % path,
                           checker_runtime=scheduler_rt)
        self.event.save()
Esempio n. 24
0
    def extraSetUpHTMLChecker(self):
        self.scraper.checker_type = 'X'
        self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()'
        self.scraper.checker_x_path_result = u'Event not found!'
        self.scraper.checker_ref_url = u'http://localhost:8010/static/site_with_json_content_type/event_not_found.html'
        self.scraper.save()

        scheduler_rt = SchedulerRuntime()
        scheduler_rt.save()

        self.event = Event(
            title='Event 1',
            event_website=self.event_website,
            description='Event 1 description',
            url=
            'http://localhost:8010/static/site_with_json_content_type/event_not_found.html',
            checker_runtime=scheduler_rt)
        self.event.save()
Esempio n. 25
0
    def process_item(self, item, spider):
      if spider.conf['DO_ACTION']: #Necessary since DDS v.0.9+
            try:
                print('HJ start saving')
                item['post_site'] = spider.ref_object

                checker_rt = SchedulerRuntime(runtime_type='C')
                checker_rt.save()
                item['checker_runtime'] = checker_rt

                print(item['foo'])

                if len(item['foo']) != 0:
                    selector = Selector(text=item['foo'])
                    options = selector.xpath('//option')
                    option_items = []
                    for option in options:
                        option_items.append(option.xpath("text()").extract())
                    print(option_items)
                    option_items.pop(0)
                    item['foo'] = option_items
                    # item['foo'] = json.dumps(option_items)

                print(item['foo'])
                item.save()
                spider.action_successful = True
                dds_id_str = str(item._dds_item_page) + '-' + str(item._dds_item_id)
                spider.struct_log("{cs}Item {id} saved to Django DB.{ce}".format(
                    id=dds_id_str,
                    cs=spider.bcolors['OK'],
                    ce=spider.bcolors['ENDC']))

            except IntegrityError as e:
                print('HJ integrity error')
                spider.log(str(e), logging.ERROR)
                spider.log(str(item._errors), logging.ERROR)
                raise DropItem("Missing attribute.")
      else:
          print('HJ not do_action')
          if not item.is_valid():
              spider.log(str(item._errors), logging.ERROR)
              raise DropItem("Missing attribute.")

      return item
Esempio n. 26
0
    def extraSetUpJSONChecker(self):
        self.scraper.detail_page_content_type = 'J'
        self.scraper.checker_type = 'X'
        self.scraper.checker_x_path = u'event_not_found'
        self.scraper.checker_x_path_result = u'Event not found!'
        self.scraper.checker_ref_url = u'http://localhost:8010/static/site_with_json_content_type/event_not_found.json'
        self.scraper.save()

        scheduler_rt = SchedulerRuntime()
        scheduler_rt.save()

        self.event = Event(
            title='Event 1',
            event_website=self.event_website,
            description='Event 1 description',
            url=
            'http://localhost:8010/static/site_with_json_content_type/event_not_found.json',
            checker_runtime=scheduler_rt)
        self.event.save()
Esempio n. 27
0
    def setUp(self):
        super(CheckerRunTest, self).setUp()

        self.scraper.checker_type = 'X'
        self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()'
        self.scraper.checker_x_path_result = u'Event was deleted!'
        self.scraper.checker_ref_url = u'http://localhost:8010/static/site_for_checker/event_not_found.html'
        self.scraper.save()

        scheduler_rt = SchedulerRuntime()
        scheduler_rt.save()

        self.event = Event(
            title='Event 1',
            event_website=self.event_website,
            description='Event 1 description',
            url='http://localhost:8010/static/site_for_checker/event1.html',
            checker_runtime=scheduler_rt)
        self.event.save()
    def setUp(self):
        super(CheckerRunTest, self).setUp()

        self.scraper.checker_type = "X"
        self.scraper.checker_x_path = u'//div[@class="event_not_found"]/div/text()'
        self.scraper.checker_x_path_result = u"Event was deleted!"
        self.scraper.checker_ref_url = u"http://localhost:8010/static/site_for_checker/event_not_found.html"
        self.scraper.save()

        scheduler_rt = SchedulerRuntime()
        scheduler_rt.save()

        self.event = Event(
            title="Event 1",
            event_website=self.event_website,
            description="Event 1 description",
            url="http://localhost:8010/static/site_for_checker/event1.html",
            checker_runtime=scheduler_rt,
        )
        self.event.save()
Esempio n. 29
0
    def process_item(self, item, spider):
        try:
            if isinstance(spider.ref_object, LoanScraper):
                item["loan_scraper"] = spider.ref_object
            elif isinstance(spider.ref_object, InsuranceWebsite):
                item["insurance_website"] = spider.ref_object
            else:
                item["news_website"] = spider.ref_object

            checker_rt = SchedulerRuntime(runtime_type="C")
            checker_rt.save()
            item["checker_runtime"] = checker_rt

            item.save()
            spider.action_successful = True
            spider.log("Item saved.", log.INFO)

        except IntegrityError, e:
            spider.log(str(e), log.ERROR)
            raise DropItem("Missing attribute.")
    def extraSetUpHTMLChecker(self):
        self.checker = Checker()
        self.checker.scraped_obj_attr = self.soa_url
        self.checker.scraper = self.scraper
        self.checker.checker_type = "X"
        self.checker.checker_x_path = '//div[@class="event_not_found"]/div/text()'
        self.checker.checker_x_path_result = "Event not found!"
        self.checker.checker_ref_url = "http://localhost:8010/static/site_with_json_content_type/event_not_found.html"
        self.checker.save()

        scheduler_rt = SchedulerRuntime()
        scheduler_rt.save()

        self.event = Event(
            title="Event 1",
            event_website=self.event_website,
            description="Event 1 description",
            url="http://localhost:8010/static/site_with_json_content_type/event_not_found.html",
            checker_runtime=scheduler_rt,
        )
        self.event.save()
Esempio n. 31
0
 def process_item(self, item, spider):
     if spider.conf['DO_ACTION']:
         try:
             item['news_website'] = spider.ref_object
             
             checker_rt = SchedulerRuntime(runtime_type='C')
             checker_rt.save()
             item['checker_runtime'] = checker_rt
             
             item.save()
             spider.action_successful = True
             spider.struct_log("{cs}Item {id} saved to Django DB.{ce}".format(
                 id=item._dds_id_str,
                 cs=spider.bcolors['OK'],
                 ce=spider.bcolors['ENDC']))
                 
         except IntegrityError as e:
             spider.log(str(e), logging.ERROR)
             raise DropItem("Missing attribute.")
             
     return item
Esempio n. 32
0
    def process_item(self, item, spider):
        if spider.conf['DO_ACTION']:
            try:
                item['food_website'] = spider.ref_object

                checker_rt = SchedulerRuntime(runtime_type='C')
                checker_rt.save()
                item['checker_runtime'] = checker_rt

                item.save()
                spider.action_successful = True
                spider.struct_log(
                    "{cs}Item {id} saved to Django DB.{ce}".format(
                        id=item._dds_id_str,
                        cs=spider.bcolors['OK'],
                        ce=spider.bcolors['ENDC']))

            except IntegrityError as e:
                spider.log(str(e), logging.ERROR)
                raise DropItem("Missing attribute.")

        return item
Esempio n. 33
0
	def process_item(self, item, spider):
		'''

		this is the processing portion of the spider to the django ORM/Database. What this particular
		portion does is that it gets the spider and based on the configuration it will save the 
		information in my DB. It follows the same rules and principles of the scrapy pipeline model. In 
		case there is an intgrity error, it will drop the items and give back and errot of missing attrib

		'''
		
		if spider.conf['DO_ACTION']:
			try:
				item['news_website'] = spider.ref_object

				checker_rt = SchedulerRuntime(runtime_type='C')
				checker_rt.save()
				item['checker_runtime'] = checker_rt
				item.save()
				spider.action_successful = True
				spider.log("Items saved in the DB", logging.INFO)

			except IntegrityError, e:
				spider.log(str(e), logging.ERROR)
				raise DropItem("missing attrib")
Esempio n. 34
0
    def process_item(self, item, spider):
        if spider.conf['DO_ACTION']: #Necessary since DDS v.0.9+
            try:

                item['source'] = spider.ref_object

                checker_rt = SchedulerRuntime(runtime_type='C')
                checker_rt.save()
                item['checker_runtime'] = checker_rt
                if 'started_at' in item:
                    item['started_at'] = item['started_at'] + '-01-01'
                item['industy'], created = Industy.objects.get_or_create(name=item['industy'])

                try:
                    item_model = item_to_model(item)
                except TypeError:
                    return item

                model, created = get_or_create(item_model)

                model.tags_raw = ''
                model.save();
            
                tags = ast.literal_eval(item['tags_raw'].encode('utf-8'))
                tag_objs = []
                for tag in tags:
                    tag_name = tag['tag_name']
                    tag_obj, tag_created = Tag.objects.get_or_create(name=tag_name)
                    if tag:
                        tag_obj.save()
                        tag_objs.append(tag_obj)

                if tag_objs:
                    model.tags.add(*tag_objs)

                model.tags_raw = ','.join(t.name for t in tag_objs)


                # 保存融资信息
                invest_firm = ''
                investments = ast.literal_eval(item['investment_raw'].encode('utf-8'))                
                for i in investments:

                    invest_date = '-'.join(str(i) for i in [i['invse_year'], i['invse_month'],i['invse_month']])

                    if i['invse_detail_money'] == 0:
                        invest_amount = i['invse_similar_money']['invse_similar_money_name'] + i['invse_currency']['invse_currency_name']
                    else:
                        invest_amount = str(i['invse_detail_money']) + i['invse_currency']['invse_currency_name']

                    if i['invse_rel_invst_name']:
                        invest_firm = i['invse_rel_invst_name']
                    else:
                        invest_firm = ' '.join([org['invst_name'] for org in i['invse_orags_list']])



                    invest_round = i['invse_round']['invse_round_name']
                    investment, investment_created = Investment.objects.get_or_create(invest_date = invest_date,
                        invest_firm = invest_firm,
                        invest_round = invest_round,
                        invest_amount = invest_amount,
                        invest_to = model)

                    investment.save()

                model.investment_raw = invest_firm
                model.save()

                # backup
                # save tags_soup
                # tags_soup = Soup(item['tags_raw'], 'lxml')
                # tags = []
                # for tag_soup in select(tags_soup, 'a span'):
                #     tag = tag_soup.string
                #     tag_obj, tag_created = Tag.objects.get_or_create(name=tag)
                #     if tag:
                #         tag_obj.save()
                #         tags.append(tag_obj)

                # if tags:
                #     model.tags.add(*tags)

                # model.tags_raw = ','.join(t.name for t in tags)


                #save investment
                # soup = Soup(item['investment_raw'], 'lxml')
                # invest_firm = ''
                # for investment_soup in soup.find_all('tr'):
                #     invest_date = select(investment_soup, 'span.date')[0].string.replace('.', '-')
                #     invest_amount = select(investment_soup, 'span.finades a')[0].string

                #     tds = select(investment_soup, 'td')
                #     if tds[3]:
                #         invest_firm = ','.join(i.string for i in tds[3].find_all('a'))

                #     invest_round = select(investment_soup, 'span.round a')[0].string
                #     investment, investment_created = Investment.objects.get_or_create(invest_date = invest_date,
                #         invest_firm = invest_firm,
                #         invest_round = invest_round,
                #         invest_amount = invest_amount,
                #         invest_to = model)

                #     investment.save()

                # model.investment_raw = invest_firm
                

                if created:
                    spider.log('==' + model.name + '== created.', log.INFO)
                    
                else:
                    spider.log('==' + model.name + '== updated.', log.INFO)

                spider.action_successful = True
                
            except IntegrityError, e:
                spider.log(str(e), logging.ERROR)
                spider.log(str(item._errors), logging.ERROR)
                raise DropItem("Missing attribute.")