예제 #1
0
def get_data_set(i): 
    dataset={}
    dataset['main']={}
    dataset['updates']={}
    dataset['backers']={}
    dataset['main'] = getPage(i)[0]
    parser=MyClass()
    parser.parse_project_page("http://www.idea.me"+dataset['main']['url'])
    parser.parse_campaigner_bio("http://www.idea.me"+dataset['main']['url'])
    parser.parse_backers(dataset['main']['id'])   
    parser.parse_updates(dataset['main']['id'])
    dataset['main'].update(parser.stuff)  
    dataset['main']['fb_likes']=parser.get_fb_likes(dataset['main']['url'])
    
    dataset['backers']=parser.backers
    dataset['backers']['projectnr']=dataset['main']['id']
    
    dataset['updates']=parser.updates
    dataset['updates']['projectnr']=dataset['main']['id']
    #print("json_data")
    #print(json_data)
    return dataset
예제 #2
0
 def get_data_set(self,i): 
     dataset={}
     dataset['main']={}
     dataset['updates']={}
     dataset['backers']={}
     cnt=0
     while(cnt<100):
         try:
             dataset['main'] = self.getPage(i)[0]
             
                 
             parser=MyClass()
             parser.parse_project_page("http://www.idea.me"+dataset['main']['url'])
             parser.parse_campaigner_bio("http://www.idea.me"+dataset['main']['url'])
             parser.parse_backers(dataset['main']['id'])   
             parser.parse_updates(dataset['main']['id'])
             dataset['main'].update(parser.stuff)  
             dataset['main']['fb_likes']=parser.get_fb_likes(dataset['main']['url'])
             
             dataset['backers']=parser.backers
             dataset['backers']['projectnr']=dataset['main']['id']
             
             dataset['updates']=parser.updates
             dataset['updates']['projectnr']=dataset['main']['id']
             #print("json_data")
             #print(json_data)
             break
         except(AttributeError,KeyError):
             r=random.random()*500
             print(str(self.threadID)+": campaign.sleep "+str(r))
             time.sleep(random.random()*500)
         except(IndexError):
             self.status="empty"
         cnt=cnt+1
         if(cnt==100):
             self.status="Failed on Time-out"
     return dataset