def get_data_set(i): dataset={} dataset['main']={} dataset['updates']={} dataset['backers']={} dataset['main'] = getPage(i)[0] parser=MyClass() parser.parse_project_page("http://www.idea.me"+dataset['main']['url']) parser.parse_campaigner_bio("http://www.idea.me"+dataset['main']['url']) parser.parse_backers(dataset['main']['id']) parser.parse_updates(dataset['main']['id']) dataset['main'].update(parser.stuff) dataset['main']['fb_likes']=parser.get_fb_likes(dataset['main']['url']) dataset['backers']=parser.backers dataset['backers']['projectnr']=dataset['main']['id'] dataset['updates']=parser.updates dataset['updates']['projectnr']=dataset['main']['id'] #print("json_data") #print(json_data) return dataset
def get_data_set(self,i): dataset={} dataset['main']={} dataset['updates']={} dataset['backers']={} cnt=0 while(cnt<100): try: dataset['main'] = self.getPage(i)[0] parser=MyClass() parser.parse_project_page("http://www.idea.me"+dataset['main']['url']) parser.parse_campaigner_bio("http://www.idea.me"+dataset['main']['url']) parser.parse_backers(dataset['main']['id']) parser.parse_updates(dataset['main']['id']) dataset['main'].update(parser.stuff) dataset['main']['fb_likes']=parser.get_fb_likes(dataset['main']['url']) dataset['backers']=parser.backers dataset['backers']['projectnr']=dataset['main']['id'] dataset['updates']=parser.updates dataset['updates']['projectnr']=dataset['main']['id'] #print("json_data") #print(json_data) break except(AttributeError,KeyError): r=random.random()*500 print(str(self.threadID)+": campaign.sleep "+str(r)) time.sleep(random.random()*500) except(IndexError): self.status="empty" cnt=cnt+1 if(cnt==100): self.status="Failed on Time-out" return dataset