Beispiel #1
0
 def process_item(self, item, spider):
     item['update_time'] = datetime.strptime(item['update_time'].strip('Z').replace("T"," "), '%Y-%m-%d %H:%M:%S')
     item['commits'] = item['commits'].replace(",", "")
     item['branches'] = item['branches'].replace(",", "")
     item['releases'] = item['releases'].replace(",", "")
     self.session.add(Repository(**item)) 
     return item
Beispiel #2
0
 def process_item(self, item, spider):
     item['update_time'] = datetime.strptime(
         item['update_time'].split('T')[0], '%Y-%m-%d').date()
     item['commits'] = int(item['commits'].replace(',', ''))
     item['branches'] = int(item['branches'].replace(',', ''))
     item['releases'] = int(item['releases'].replace(',', ''))
     self.session.add(Repository(**item))
Beispiel #3
0
 def process_item(self, item, spider):
     item['update_time'] = datetime.strptime(
         item['update_time'].strip('Z').replace("T", " "),
         '%Y-%m-%d %H:%M:%S')
     #item['update_time'] = datetime.strptime(item['update_time'].split('T')[0], '%Y-%m-%d').date()
     self.session.add(Repository(**item))
     return item
Beispiel #4
0
 def process_item(self, item, spider):
     origin_time = item['update_time'].split('T')
     item['update_time'] = datetime.strptime(
         origin_time[0] + ' ' + origin_time[1].strip('Z'),
         '%Y-%m-%d %H:%M:%S')
     self.session.add(Repository(**item))
     return item
Beispiel #5
0
 def process_item(self, item, spider):
     item['update_time'] = datetime.strptime(item['update_time'],
                                             '%Y-%m-%dT%H:%M:%SZ')
     item['commits'] = int(item['commits'])
     item['branches'] = int(item['branches'])
     item['releases'] = int(item['releases'])
     self.session.add(Repository(**item))
     return item
Beispiel #6
0
 def process_item(self, item, spider):
     '''
     list = item['update_time']
     item['update_time'] = datetime.strptime(list.split('T')[0]+' '+list.split('T')[1],'%Y-%m-%d %H:%M:%S')
     '''
     item['update_time'] = datetime.strptime(item['update_time'], '%Y-%m-%dT%H:%M:%S')
     self.session.add(Repository(**item))
     return item
Beispiel #7
0
 def process_item(self, item, spider):
     # print('======>000')
     # print(item)
     # print('<======000')
     item['name'] = item['name'][0]
     # if (isinstance(item['update_time'], list)):
     item['update_time'] = datetime.strptime(
         item['update_time'][0].split('T')[0], '%Y-%m-%d').date()
     # else:
     #     item['update_time'] = datetime.strptime(item['update_time'].split()[0], '%Y-%m-%d').date()
     self.session.add(Repository(**item))
Beispiel #8
0
 def process_item(self, item, spider):
     item['update_time'] = datetime.strptime(item['update_time'], '%Y-%m-%dT%H:%M:%SZ')
     if item['commits'] is None:
         item['commits'] = 0
     else:
         item['commits'] = int(item['commits'].replace(',',''))
     if item['branches'] is None:
         item['branches'] = 0
     else:
         item['branches'] = int(item['branches'].replace(',',''))
     if item['releases'] is None:
         item['releases'] = 0
     else:
         item['releases'] = int(item['releasese'].replace(',',''))
     self.session.add(Repository(**item))
     return item
Beispiel #9
0
 def process_item(self, item, spider):
     item['update_time'] = datetime.strptime(
             item['update_time'], '%Y-%m-%dT%H:%M:%SZ')
     if item['commits'] is not None:
         if ',' in item['commits']:
             num_list=item['commits'].split(',')
             item['commits']=int(num_list[0]+num_list[1])
         else:
             item['commits']=int(item['commits'])
     if item['branches'] is not None:
         if ',' in item['branches']:
             num_list=item['branches'].split(',')
             item['branches']=int(num_list[0]+num_list[1])
         else:
             item['branches']=int(item['branches'])
     if item['releases'] is not None:
         if ',' in item['releases']:
             num_list=item['releases'].split(',')
             item['releases']=int(num_list[0]+num_list[1])
         else:
             item['releases']=int(item['releases'])
     self.session.add(Repository(**item))
     return item
Beispiel #10
0
    def process_item(self, item, spider):

        self.session.add(Repository(**item))
        return item
Beispiel #11
0
 def process_item(self, item, spider):
     item['update_time'] = datetime.strptime(
         item['update_time'].split('T')[0], '%Y-%m-%d').date()
     self.session.add(Repository(**item))
     return item
Beispiel #12
0
 def process_item(self, item, spider):
     if isinstance(item, ShiyanlougithubItem):
         d = item['update_time'] 
         item['update_time'] = datetime.strptime(d, '%Y-%m-%dT%H:%M:%SZ')
         self.s.add(Repository(**item))
     return item
Beispiel #13
0
 def process_item(self, item, spider):
     item['commits'] = int(item['commits'].replace(',', ''))
     item['update_time'] = datetime.strptime(item['update_time'],
                                             '%Y-%m-%dT%H:%M:%SZ')
     print('-------------------------', item)
     self.session.add(Repository(**item))
Beispiel #14
0
 def _process_repo_item(self,item):
     item['update_time'] = datetime.strptime(item['update_time'],'%Y-%m-%dT%H:%M:%SZ')
     self.session.add(
         Repository(**item)
     )
Beispiel #15
0
 def process_item(self, item, spider):
     commits = item['commits'].strip(',')
     item['commits'] = commits
     self.session.add(Repository(**item))
     return item
Beispiel #16
0
 def process_item(self, item, spider):
     #增加models.py文件里Repository里面的items,连接数据库
     self.session.add(Repository(**item))
     return item
Beispiel #17
0
 def process_item(self, item, spider):
     item['update_time'] = datetime.strptime(item['update_time'], '%Y-%m-%dT%H:%M:%SZ')
     self.session.add(Repository(**item))
     return item
Beispiel #18
0
 def process_item(self, item, spider):
     item['commits'] = int(''.join(item['commits'].split(',')))
     item['branches'] = int(item['branches'])
     item['releases'] = int(item['releases'])
     self.session.add(Repository(**item))
     return item