def process_item(self, item, spider): item['update_time'] = datetime.strptime(item['update_time'].strip('Z').replace("T"," "), '%Y-%m-%d %H:%M:%S') item['commits'] = item['commits'].replace(",", "") item['branches'] = item['branches'].replace(",", "") item['releases'] = item['releases'].replace(",", "") self.session.add(Repository(**item)) return item
def process_item(self, item, spider): item['update_time'] = datetime.strptime( item['update_time'].split('T')[0], '%Y-%m-%d').date() item['commits'] = int(item['commits'].replace(',', '')) item['branches'] = int(item['branches'].replace(',', '')) item['releases'] = int(item['releases'].replace(',', '')) self.session.add(Repository(**item))
def process_item(self, item, spider): item['update_time'] = datetime.strptime( item['update_time'].strip('Z').replace("T", " "), '%Y-%m-%d %H:%M:%S') #item['update_time'] = datetime.strptime(item['update_time'].split('T')[0], '%Y-%m-%d').date() self.session.add(Repository(**item)) return item
def process_item(self, item, spider): origin_time = item['update_time'].split('T') item['update_time'] = datetime.strptime( origin_time[0] + ' ' + origin_time[1].strip('Z'), '%Y-%m-%d %H:%M:%S') self.session.add(Repository(**item)) return item
def process_item(self, item, spider): item['update_time'] = datetime.strptime(item['update_time'], '%Y-%m-%dT%H:%M:%SZ') item['commits'] = int(item['commits']) item['branches'] = int(item['branches']) item['releases'] = int(item['releases']) self.session.add(Repository(**item)) return item
def process_item(self, item, spider): ''' list = item['update_time'] item['update_time'] = datetime.strptime(list.split('T')[0]+' '+list.split('T')[1],'%Y-%m-%d %H:%M:%S') ''' item['update_time'] = datetime.strptime(item['update_time'], '%Y-%m-%dT%H:%M:%S') self.session.add(Repository(**item)) return item
def process_item(self, item, spider): # print('======>000') # print(item) # print('<======000') item['name'] = item['name'][0] # if (isinstance(item['update_time'], list)): item['update_time'] = datetime.strptime( item['update_time'][0].split('T')[0], '%Y-%m-%d').date() # else: # item['update_time'] = datetime.strptime(item['update_time'].split()[0], '%Y-%m-%d').date() self.session.add(Repository(**item))
def process_item(self, item, spider): item['update_time'] = datetime.strptime(item['update_time'], '%Y-%m-%dT%H:%M:%SZ') if item['commits'] is None: item['commits'] = 0 else: item['commits'] = int(item['commits'].replace(',','')) if item['branches'] is None: item['branches'] = 0 else: item['branches'] = int(item['branches'].replace(',','')) if item['releases'] is None: item['releases'] = 0 else: item['releases'] = int(item['releasese'].replace(',','')) self.session.add(Repository(**item)) return item
def process_item(self, item, spider): item['update_time'] = datetime.strptime( item['update_time'], '%Y-%m-%dT%H:%M:%SZ') if item['commits'] is not None: if ',' in item['commits']: num_list=item['commits'].split(',') item['commits']=int(num_list[0]+num_list[1]) else: item['commits']=int(item['commits']) if item['branches'] is not None: if ',' in item['branches']: num_list=item['branches'].split(',') item['branches']=int(num_list[0]+num_list[1]) else: item['branches']=int(item['branches']) if item['releases'] is not None: if ',' in item['releases']: num_list=item['releases'].split(',') item['releases']=int(num_list[0]+num_list[1]) else: item['releases']=int(item['releases']) self.session.add(Repository(**item)) return item
def process_item(self, item, spider): self.session.add(Repository(**item)) return item
def process_item(self, item, spider): item['update_time'] = datetime.strptime( item['update_time'].split('T')[0], '%Y-%m-%d').date() self.session.add(Repository(**item)) return item
def process_item(self, item, spider): if isinstance(item, ShiyanlougithubItem): d = item['update_time'] item['update_time'] = datetime.strptime(d, '%Y-%m-%dT%H:%M:%SZ') self.s.add(Repository(**item)) return item
def process_item(self, item, spider): item['commits'] = int(item['commits'].replace(',', '')) item['update_time'] = datetime.strptime(item['update_time'], '%Y-%m-%dT%H:%M:%SZ') print('-------------------------', item) self.session.add(Repository(**item))
def _process_repo_item(self,item): item['update_time'] = datetime.strptime(item['update_time'],'%Y-%m-%dT%H:%M:%SZ') self.session.add( Repository(**item) )
def process_item(self, item, spider): commits = item['commits'].strip(',') item['commits'] = commits self.session.add(Repository(**item)) return item
def process_item(self, item, spider): #增加models.py文件里Repository里面的items,连接数据库 self.session.add(Repository(**item)) return item
def process_item(self, item, spider): item['update_time'] = datetime.strptime(item['update_time'], '%Y-%m-%dT%H:%M:%SZ') self.session.add(Repository(**item)) return item
def process_item(self, item, spider): item['commits'] = int(''.join(item['commits'].split(','))) item['branches'] = int(item['branches']) item['releases'] = int(item['releases']) self.session.add(Repository(**item)) return item