def commit_records(buf): # First separate existing and new records identifiers = [r.identifier for r in buf] records_to_update = OaiRecord.objects.filter(identifier__in=identifiers) identifiers_found = set([r.identifier for r in records_to_update]) records_to_create = [] for r in buf: if r.identifier not in identifiers_found: records_to_create.append(r) # Create new records try: OaiRecord.objects.bulk_create(records_to_create) except IntegrityError: # No idea why this can actually happen for r in records_to_create: OaiRecord.objects.get_or_create(identifier=r.identifier, defaults={'source':r.source, 'timestamp':r.timestamp, 'format':r.format, 'fingerprint':r.fingerprint, 'doi':r.doi, 'metadata':r.metadata, 'last_modified':r.last_modified or datetime.now()}) # Update existing ones if records_to_update: bulk_update(records_to_update)
def refresh_submission_details(self): ''' Based on new and existing mappings, upsert the cooresponding master_object_id_ids and determine which rows are ready to process.. since we need a master_location and a master_campaign in order to have a successful submission this method helps us filter the data that we need to process. Would like to be more careful here about what i delete as most things wont be touched when it comes to this re-processing, and thus the delete and re-insert will be for not.. however the benefit is that it's a clean upsert.. dont have to worry about old data that should have been blown away hanging around. ''' ss_id_list_to_process, all_ss_ids = [], [] # find soure_submission_ids based of location_codes to process # then get the json of all of the related submissions . submission_qs = SourceSubmission.objects\ .filter(document_id=self.id) for submission in submission_qs: all_ss_ids.append(submission.id) location_id = submission.get_location_id() campaign_id = submission.get_campaign_id() if location_id > 0: ss_id_list_to_process.append(submission.id) submission.location_id = location_id submission.campaign_id = campaign_id if len(submission_qs) > 0: bulk_update(submission_qs) return ss_id_list_to_process, all_ss_ids
def get_shopping_item(request): # delete old date completed item old_items = get_old_date_items(request, ShoppingItem, 'shopping_item_date__range', completed=True) old_items.delete() # update old date uncompleted item to today not_completed_items = get_old_date_items(request, ShoppingItem, 'shopping_item_date__range', completed=False) today = datetime.date.today() for item in not_completed_items: item.shopping_item_date = today bulk_update(not_completed_items, update_fields=['shopping_item_date']) items = ShoppingItem.objects.filter(user=request.user).order_by( "completed", "shopping_item_date", "shop").values() item_list = list(items) shops = Shop.objects.filter(user=request.user).values() shop_list = list(shops) pictures = ShoppingItemPicture.objects.filter(user=request.user).values() picture_list = list(pictures) all_lists = { "all_items": item_list, "shops": shop_list, "pictures": picture_list } return JsonResponse(all_lists, safe=False)
def migrate_clickmeter_product_links(): from debra.models import BrandJobPost, Contract campaigns = BrandJobPost.objects.filter( candidates__contract__tracking_link__isnull=False).distinct() total = campaigns.count() for n, campaign in enumerate(campaigns, start=1): print '* {}/{} -- {} processing'.format(n, total, campaign) contracts = Contract.objects.filter(influencerjobmapping__job=campaign) if campaign.info_json.get('same_product_url'): product_url = campaign.info_json.get('product_url') if product_url: campaign.product_urls = [product_url] campaign.product_urls.append("") # campaign._ignore_old = True campaign.save() else: for contract in contracts: if contract.product_url: contract.product_urls = [contract.product_url] contract.product_urls.append("") contract.product_tracking_links = [contract.tracking_link] contract._ignore_old = True bulk_update( contracts, update_fields=['product_urls', 'product_tracking_links'])
def cleanup_paper_researcher_ids(): """ Ensures that all researcher_ids in Papers link to actual researchers """ researcher_ids = set(Researcher.objects.all().values_list('id', flat=True)) bs = 1000 curid = 0 found = True while found: found = False batch = [] for p in Paper.objects.filter(id__gt=curid).order_by('id')[:bs]: curid = p.id found = True modified = False for i, author in enumerate(p.authors_list): rid = author['researcher_id'] if (rid is not None and rid not in researcher_ids): p.authors_list[i]['researcher_id'] = None modified = True if modified: batch.append(p) if batch: logger.info("Updating {} papers, from {}".format(len(batch), curid)) bulk_update(batch) else: logger.info(curid)
def start(self): Base().connection() packet = NormalizePublication.objects.filter( title_hashes={}).order_by('pubdate')[:self.publications_count] # запрашиваем все слова vocabulary = self.__get_all_words(packet) # подтягиваем синонимы self.__replace_synonims(vocabulary) result = [] for line in packet: result_line = {} title = self.__hash_list(line.title.split(' ')) text = self.__hash_list(line.text.split(' ')) result_line['title_hash'] = title result_line['text_hash'] = text # цепляем номера к заголовку result_line['title_words'] = self.__link_numbers( line.title_words, result_line['title_hash'], vocabulary) #складываем все слова result_line['title_words'] = self.__append_n_sort( result_line['title_words']) #создаем лист со словами result_line['title_words'] = self.__make_list_with_parents( result_line['title_words']) # цепляем номера к тексту result_line['text_words'] = self.__link_numbers( line.text_words, result_line['text_hash'], vocabulary) #складываем все слова result_line['text_words'] = self.__append_n_sort( result_line['text_words']) #создаем лист со словами result_line['text_words'] = self.__make_list_with_parents( result_line['text_words']) result.append({ 'id': line.id, 'title_hashes': result_line['title_words'], 'text_hashes': result_line['text_words'], }) for line in packet: for result_line in result: if line.id == result_line['id']: line.title_hashes = result_line['title_hashes'] line.text_hashes = result_line['text_hashes'] bulk_update(packet) self.save_status(len(packet))
def update_stats(translated_resources, locale): """Update stats on a list of TranslatedResource. """ projects = set() for translated_resource in translated_resources: projects.add(translated_resource.resource.project) translated_resource.calculate_stats(save=False) bulk_update( translated_resources, update_fields=[ "total_strings", "approved_strings", "fuzzy_strings", "strings_with_errors", "strings_with_warnings", "unreviewed_strings", ], ) locale.aggregate_stats() for project in projects: project.aggregate_stats() ProjectLocale.objects.get(locale=locale, project=project).aggregate_stats()
def commit_records(buf): # First separate existing and new records identifiers = [r.identifier for r in buf] records_found = OaiRecord.objects.filter(identifier__in=identifiers).values_list('identifier','id') identifiers_found = {identifier:id for identifier,id in records_found} records_to_create = [] records_to_update = [] for r in buf: id = identifiers_found.get(r.identifier) if id: r.id = id r.last_modified = make_aware(datetime.now(), UTC()) records_to_update.append(r) else: records_to_create.append(r) # Create new records try: OaiRecord.objects.bulk_create(records_to_create) except IntegrityError: # No idea why this can actually happen for r in records_to_create: OaiRecord.objects.get_or_create(identifier=r.identifier, defaults={'source':r.source, 'timestamp':r.timestamp, 'format':r.format, 'fingerprint':r.fingerprint, 'doi':r.doi, 'metadata':r.metadata, 'last_modified':r.last_modified or datetime.now()}) # Update existing ones if records_to_update: bulk_update(records_to_update)
def test_gen_char_task(): columnTaskList = ColumnTask.objects.filter(status=TaskStatus.NOT_GOT) columntasks = [] pagerects = [] charrects = [] for columnTask in columnTaskList: if (randint(0, 9) / 2) == 0: # 随机模拟列任务完成了 columnTask.status = TaskStatus.COMPLETED columntasks.append(columnTask) pagerectList = columnTask.pagerects.all() for pageRect in pagerectList: pageRect.op = OpStatus.CHANGED # 说明页被校对过了. pageRect.status = PageRectStatus.CUT_COLUMN_COMPLETED pagerects.append(pageRect) # 遍历每一页的所有列数据, 并调用切字算法对这一列的数据进行切字 columnrectList = ColumnRect.objects.filter( pagerect=pageRect.id) for columnRect in columnrectList: # 模拟算法生成该列的切字数据集. 默认每个字高为50, 算出要切多少个字. charCount = int(columnRect.h / TEST_CHAR_HEIGHT) for no in range(0, charCount): char = CharRect.create_Rect( columnRect, no, columnRect.x, columnRect.y + no * TEST_CHAR_HEIGHT, randint(35, TEST_COLUMN_WIDTH), randint(35, TEST_CHAR_HEIGHT)) charrects.append(char) CharRect.objects.bulk_create(charrects) bulk_update(pagerects) bulk_update(columntasks)
def get_ips(request): request_id = str(uuid4()) # Assign 987 IPs for this worker so that these IPs aren't assigned to # other workers. https://github.com/aykut/django-bulk-update/issues/46 # says that SQLite3 will support up to 47 record updates per batch. for _ in range(0, 21): ids = list( IPv4Address.objects.filter(allocated_for_request=None).values_list( 'id', flat=True)[:47]) ips = IPv4Address.objects.filter(pk__in=ids) for ip in ips: ip.allocated_for_request = request_id bulk_update(ips, update_fields=['allocated_for_request']) ips = ','.join([ ip.address for ip in IPv4Address.objects.filter(allocated_for_request=request_id) ]) if not ips: return HttpResponse('END') # No more IPs to work with return HttpResponse(ips)
def test_gen_column_task(): pageTaskList = PageTask.objects.filter(status=TaskStatus.NOT_GOT) pagetasks = [] pagerects = [] columnrects = [] for pageTask in pageTaskList: if (randint(0, 9) / 2) == 0: # 随机模拟页任务完成了 pageTask.status = TaskStatus.COMPLETED pagetasks.append(pageTask) pagerectList = pageTask.pagerects.all() for pageRect in pagerectList: pageRect.op = OpStatus.CHANGED # 说明页被校对过了. pageRect.status = PageRectStatus.CUT_PAGE_COMPLETED pagerects.append(pageRect) # 模拟算法生成该页的列数据集. columnColunt = randint(3, 10) for no in range(0, columnColunt): column = ColumnRect.create_Rect(pageRect, no, randint(10, 800), randint(10, 600), TEST_COLUMN_WIDTH, randint(300, 600)) columnrects.append(column) ColumnRect.objects.bulk_create(columnrects) bulk_update(pagerects) bulk_update(pagetasks)
def random_records(): lst = list(GroupRecord.objects.all()) for r in lst: r.last_name = rand_str(random.randint(2,30)) r.first_name = rand_str(random.randint(2, 30)) r.middle_name = rand_str(random.randint(2, 30)) bulk_update(lst)
def update_best_match(self, cpgm, celery_obj=None, current=None): cpgqs = CPeakGroup.objects.filter( cpeakgroupmeta=cpgm, ).values('id').annotate( Max('cannotation__weighted_score'), Max('cannotation__compound__name')) cpgs = [] for i, cpgq in enumerate(cpgqs): if i % 200 == 0: print(i) bulk_update(cpgs) cpgs = [] if celery_obj: celery_obj.update_state( state='RUNNING', meta={ 'current': current, 'total': 100, 'status': 'Update best match {}'.format(i) }) cpg = CPeakGroup.objects.get(pk=cpgq['id']) cpg.best_annotation = CAnnotation.objects.filter( cpeakgroup_id=cpg.id, weighted_score=cpgq['cannotation__weighted_score__max'] )[0] if cpgq['cannotation__compound__name__max'] else None cpg.best_score = cpgq['cannotation__weighted_score__max'] cpgs.append(cpg)
def merge_records_by_hypostases(self, other_records, save=True): """Unite all records and related hypostases around this record's person. This record's person becomes the person referensed by other records and their related hypostases. All empty persons (not referenced by any hypostases) are removed. No checks about group being made assuming this checks were done more efficiently, when this group was created.""" hypostases_for_update = [] records_for_update = [] persons_to_delete = set() for record in other_records: previous_person = record.person if previous_person == self.person: continue persons_to_delete.add(previous_person) record.hypostasis.person = self.person record.person = self.person hypostases_for_update.append(record.hypostasis) records_for_update.append(record) if self.person in persons_to_delete: persons_to_delete.remove(self.person) for person in list(persons_to_delete): for hypostasis in person.hypostasis_set.all(): if hypostasis not in hypostases_for_update: persons_to_delete.remove(person) if save: bulk_update(records_for_update, update_fields=['person']) bulk_update(hypostases_for_update, update_fields=['person']) for person in persons_to_delete: person.delete() else: return records_for_update, hypostases_for_update, persons_to_delete
def unmake(self, search=True, group_dict=None, **kwargs): """Split all records in this group and delete it. Can't be done for partially merged groups.""" if self.person is not None: raise GroupError( "Can't split group, if its part was previously merged") records = list( self.grouprecord_set.all()) if not group_dict else group_dict[self] for rec1, rec2 in combinations(records, 2): rec1.forbidden_group_records.add(rec2) rec2.forbidden_group_records.add(rec1) if group_dict: group_dict.pop(self) if search: if group_dict is None: group_dict = Group.get_dictionary() for record in records: record.group = None new_group = record.seek_for_group(group_dict=group_dict, **kwargs) if new_group: record.group = new_group else: new_group = record.seek_to_make_new_group(**kwargs) if new_group: record.group = new_group if new_group: group_dict[new_group].append(record) bulk_update(records) self.delete()
def __update_contact_import(self, contacts_to_update): total_updated = len(contacts_to_update) if contacts_to_update is None or total_updated == 0: return 0 contacts_to_update = sorted(contacts_to_update, key=lambda contact: contact['contact_id']) contacts_id = [contact['contact_id'] for contact in contacts_to_update] contacts = ModelContact.objects.filter(account=self.__account, contact_id__in=contacts_id).order_by('contact_id') update_contacts = False for old_contact, new_contact in zip(contacts, contacts_to_update): first_name = new_contact.get('first_name') last_name = new_contact.get('last_name') if old_contact.first_name != first_name: old_contact.first_name = first_name update_contacts = True if old_contact.last_name != last_name: old_contact.last_name = last_name update_contacts = True if not update_contacts: return 0 bulk_update(contacts, update_fields=['first_name', 'last_name']) return total_updated
def __update_contact_import(self, contacts_to_update): total_updated = len(contacts_to_update) if contacts_to_update is None or total_updated == 0: return 0 contacts_to_update = sorted(contacts_to_update, key=lambda contact: contact['contact_id']) contacts_id = [contact['contact_id'] for contact in contacts_to_update] contacts = ModelContact.objects.filter( account=self.__account, contact_id__in=contacts_id).order_by('contact_id') update_contacts = False for old_contact, new_contact in zip(contacts, contacts_to_update): first_name = new_contact.get('first_name') last_name = new_contact.get('last_name') if old_contact.first_name != first_name: old_contact.first_name = first_name update_contacts = True if old_contact.last_name != last_name: old_contact.last_name = last_name update_contacts = True if not update_contacts: return 0 bulk_update(contacts, update_fields=['first_name', 'last_name']) return total_updated
def sync_contacts(self, update_existing=True): numbers = [] contacts = ModelContact.objects.filter(account=self.__account) for c in contacts: numbers.append(c.contact_id) result = self.__sync_contacts(numbers) result = Contacts.__adjust_sync_result(result) status_code = result.get('code') if status_code is None or status_code[0] != '2': return result if update_existing is False: return result # call bulk_update only if some records has changed update_existing = False for c in contacts: exists = c.contact_id not in result['non_existing'] if exists != c.exists: c.exists = exists update_existing = True if update_existing: bulk_update(contacts, update_fields=['exists']) return result
def fix_stats(self): """ Recalculate any broken stats when sync task is finished. This is a temporary fix for bug 1310668. """ if not self.finished: return # total_strings missmatch between TranslatedResource & Resource translated_resources = [] for t in TranslatedResource.objects.exclude(total_strings=F( 'resource__total_strings')).select_related('resource'): t.total_strings = t.resource.total_strings translated_resources.append(t) bulk_update(translated_resources, update_fields=['total_strings']) # total_strings missmatch in ProjectLocales within the same project for p in Project.objects.available(): if ProjectLocale.objects.filter( project=p).values("total_strings").distinct().count() > 1: for pl in ProjectLocale.objects.filter(project=p): pl.aggregate_stats() # translated + suggested + fuzzy > total in TranslatedResource for t in (TranslatedResource.objects.filter( resource__project__disabled=False).annotate(total=Sum( F('approved_strings') + F('translated_strings') + F('fuzzy_strings'))).filter(total__gt=F('total_strings'))): t.calculate_stats() log.info("Sync complete.")
def cleanup_paper_researcher_ids(): """ Ensures that all researcher_ids in Papers link to actual researchers """ researcher_ids = set(Researcher.objects.all().values_list('id', flat=True)) bs = 1000 curid = 0 found = True while found: found = False batch = [] for p in Paper.objects.filter(id__gt=curid).order_by('id')[:bs]: curid = p.id found = True modified = False for i, author in enumerate(p.authors_list): rid = author['researcher_id'] if (rid is not None and rid not in researcher_ids): p.authors_list[i]['researcher_id'] = None modified = True if modified: batch.append(p) if batch: logger.info("Updating {} papers, from {}".format( len(batch), curid)) bulk_update(batch) else: logger.info(curid)
def distribute_records_among_existing_groups(**kwargs): print("Starting distribution among existing groups") print("Extracting records") unresolved_records = list(GroupRecord.objects.filter(group__isnull=True)) print("Making dictionary of groups") group_dict = Group.get_dictionary() if len(group_dict) == 0: print("No groups found. Finishing") return records_to_update = [] groups_to_update = set() print("Handling records") ttl = len(unresolved_records) cntr = 0 now = time() for record in unresolved_records: cntr += 1 if cntr % 100 == 0: print("{} of {} records handled {}".format(cntr, ttl, time() - now)) now = time() suitable_group = record.seek_for_group(group_dict, **kwargs) if suitable_group is not None: record.group = suitable_group records_to_update.append(record) groups_to_update.add(suitable_group) print("Have {0} records to update".format(len(records_to_update))) if len(records_to_update) > 0: bulk_update(records_to_update, update_fields=['group']) print("Have {0} groups to update".format(len(groups_to_update))) if len(groups_to_update) > 0: mark_inconsistency(groups=list(groups_to_update)) print("Distribution among existing groups: done")
def bulk_update_entities(self): if len(self.entities_to_update) > 0: bulk_update(self.entities_to_update, update_fields=[ 'resource', 'string', 'string_plural', 'key', 'comment', 'order', 'source' ])
def conference_submissions(**kwargs): """Return data for all OSF4M submissions. The total number of submissions for each meeting is calculated and cached in the Conference.num_submissions field. """ conferences = Conference.find(Q('is_meeting', 'ne', False)) # TODO: Revisit this loop, there has to be a way to optimize it for conf in conferences: # For efficiency, we filter by tag first, then node # instead of doing a single Node query projects = set() tags = Tag.find(Q('system', 'eq', False) & Q('name', 'iexact', conf.endpoint.lower())).values_list('pk', flat=True) nodes = Node.find( Q('tags', 'in', tags) & Q('is_public', 'eq', True) & Q('is_deleted', 'ne', True) ).include('guids') projects.update(list(nodes)) num_submissions = len(projects) # Cache the number of submissions conf.num_submissions = num_submissions bulk_update(conferences, update_fields=['num_submissions']) return {'success': True}
def rekey_pagecounters(state, schema): AbstractNode = state.get_model('osf', 'AbstractNode') Guid = state.get_model('osf', 'Guid') Preprint = state.get_model('osf', 'Preprint') PageCounter = state.get_model('osf', 'PageCounter') nct = ContentType.objects.get_for_model(AbstractNode).id pct = ContentType.objects.get_for_model(Preprint).id preprints = Preprint.objects.select_related('node').exclude( primary_file_id__isnull=True).exclude(node_id__isnull=True) progress_bar = tqdm(total=preprints.count() or 1) batch = [] for i, preprint in enumerate(preprints, 1): node_id = Guid.objects.get(content_type=nct, object_id=preprint.node_id)._id file_id = preprint.primary_file._id if node_id and file_id: preprint_id = Guid.objects.filter( content_type=pct, object_id=preprint.id).values_list('_id', flat=True).first() if not preprint_id: assert False for page_counter in PageCounter.objects.filter( _id__startswith='download:{}:{}'.format(node_id, file_id)): page_counter._id = page_counter._id.replace( node_id, preprint_id) batch.append(page_counter) progress_bar.update(i) bulk_update(batch, update_fields=['_id'], batch_size=10000) progress_bar.close()
def merge_records_by_persons(self, other_records, save=True): """This merge also updates related records from the other groups if they have reference to the same person.""" persons_to_delete = set() hypostases_for_update = [] records_for_update = [] new_group = self.group new_person = self.person for record in other_records: persons_to_delete.add(record.person) if new_person in persons_to_delete: persons_to_delete.remove(new_person) for person in persons_to_delete: hypostases_for_update.extend(person.hypostasis_set.all()) records_for_update.extend(person.grouprecord_set.all()) for record in records_for_update: record.group = new_group record.person = new_person for hypostasis in hypostases_for_update: hypostasis.person = new_person if save: bulk_update(hypostases_for_update, update_fields=['person']) bulk_update(records_for_update, update_fields=['person', 'group']) for person in persons_to_delete: person.delete() else: return records_for_update, hypostases_for_update, persons_to_delete
def refresh_submission_details(self): ''' Based on new and existing mappings, upsert the cooresponding master_object_id_ids and determine which rows are ready to process.. since we need a master_location and a master_campaign in order to have a successful submission this method helps us filter the data that we need to process. Would like to be more careful here about what i delete as most things wont be touched when it comes to this re-processing, and thus the delete and re-insert will be for not.. however the benefit is that it's a clean upsert.. dont have to worry about old data that should have been blown away hanging around. ''' ss_id_list_to_process, all_ss_ids = [], [] ## find soure_submission_ids based of location_codes to process ## then get the json of all of the related submissions . submission_qs = SourceSubmission.objects\ .filter(document_id = self.document_id) for submission in submission_qs: all_ss_ids.append(submission.id) location_id = submission.get_location_id() campaign_id = submission.get_campaign_id() if location_id > 0: ss_id_list_to_process.append(submission.id) submission.location_id = location_id submission.campaign_id = campaign_id if len(submission_qs) > 0: bulk_update(submission_qs) return ss_id_list_to_process, all_ss_ids
def conference_submissions(**kwargs): """Return data for all OSF4M submissions. The total number of submissions for each meeting is calculated and cached in the Conference.num_submissions field. """ conferences = Conference.find(Q('is_meeting', 'ne', False)) # TODO: Revisit this loop, there has to be a way to optimize it for conf in conferences: # For efficiency, we filter by tag first, then node # instead of doing a single Node query projects = set() tags = Tag.find( Q('system', 'eq', False) & Q('name', 'iexact', conf.endpoint.lower())).values_list( 'pk', flat=True) nodes = Node.find( Q('tags', 'in', tags) & Q('is_public', 'eq', True) & Q('is_deleted', 'ne', True)).include('guids') projects.update(list(nodes)) num_submissions = len(projects) # Cache the number of submissions conf.num_submissions = num_submissions bulk_update(conferences, update_fields=['num_submissions']) return {'success': True}
def update_in_bulk(cls, articles): from bulk_update.helper import bulk_update bulk_update( articles, exclude_fields=['id', 'shop_code', 'shop_category'], batch_size=45 # Theoretical maximum for SQLite databases is 999 )
def run(self, batch_size=None): count_instances = 0 count_fields = 0 instances = self.get_query_set() for model_instance in instances: for field_name, replacer in self.attributes: field_value = getattr(model_instance, field_name) if callable(replacer): replaced_value = replacer(instance=model_instance, field_value=field_value) elif isinstance(replacer, basestring): replaced_value = replacer else: raise TypeError( "Replacers need to be callables or Strings!") setattr(model_instance, field_name, replaced_value) count_fields += 1 count_instances += 1 batch_size = DEFAULT_CHUNK_SIZE if batch_size is None else int( batch_size) bulk_update( instances, update_fields=[attrs[0] for attrs in self.attributes], batch_size=batch_size, ) return len(self.attributes), count_instances, count_fields
def fix_stats(self): """ Recalculate any broken stats when sync task is finished. This is a temporary fix for bug 1310668. """ if not self.finished: return # total_strings missmatch between TranslatedResource & Resource translated_resources = [] for t in TranslatedResource.objects.exclude(total_strings=F('resource__total_strings')).select_related('resource'): t.total_strings = t.resource.total_strings translated_resources.append(t) bulk_update(translated_resources, update_fields=['total_strings']) # total_strings missmatch in ProjectLocales within the same project for p in Project.objects.available(): if ProjectLocale.objects.filter(project=p).values("total_strings").distinct().count() > 1: for pl in ProjectLocale.objects.filter(project=p): pl.aggregate_stats() # translated + suggested + fuzzy > total in TranslatedResource for t in ( TranslatedResource.objects .filter(resource__project__disabled=False) .annotate(total=Sum(F('approved_strings') + F('translated_strings') + F('fuzzy_strings'))) .filter(total__gt=F('total_strings')) ): t.calculate_stats() log.info("Sync complete.")
def get_ips(request): request_id = str(uuid4()) # Assign 987 IPs for this worker so that these IPs aren't assigned to # other workers. https://github.com/aykut/django-bulk-update/issues/46 # says that SQLite3 will support up to 47 record updates per batch. for _ in range(0, 21): ids = list(IPv4Address.objects.filter( allocated_for_request=None ).values_list('id', flat=True)[:47]) ips = IPv4Address.objects.filter(pk__in=ids) for ip in ips: ip.allocated_for_request = request_id bulk_update(ips, update_fields=['allocated_for_request']) ips = ','.join([ip.address for ip in IPv4Address.objects.filter( allocated_for_request=request_id)]) if not ips: return HttpResponse('END') # No more IPs to work with return HttpResponse(ips)
def refresh_daily_stats(): start = time.time() logger.info("Starting: Getting all listed stocks for NASDAQ") print("Starting: Getting all listed stocks for NASDAQ") UpdateSymbol.get_listed_stocks() end = time.time() logger.info("Finished: Getting all listed stocks for NASDAQ in: %s" % print_timer(start, end)) print("Finished: Getting all listed stocks for NASDAQ in: %s" % print_timer(start, end)) symbols = Symbol.objects.filter(listed=True) update_symbols = list() logger.info("Starting: Updating Stock History & Stats") print("Starting: Updating Stock History & Stats") start = time.time() for symbol in symbols: symbol = UpdateSymbol.get_symbol_history(symbol) symbol = SymbolCalculations.get_average_percent_change(symbol, day_count=20) update_symbols.append(symbol) batch_size = 10 # SQLite Limitation is low... bulk_update(update_symbols, batch_size=batch_size) end = time.time() logger.info("Finished: Updating Stock History & Stats in: %s" % print_timer(start, end)) print("Finished: Updating Stock History & Stats in: %s" % print_timer(start, end))
def fix_stats(self): """ Recalculate any broken stats when sync task is finished. This is a temporary fix for bug 1310668. """ if not self.finished: return # total_strings missmatch between TranslatedResource & Resource translated_resources = [] tr_source = TranslatedResource.objects.exclude( total_strings=F("resource__total_strings") ).select_related("resource") for t in tr_source: t.total_strings = t.resource.total_strings translated_resources.append(t) log.info( "Fix stats: total_strings mismatch for {resource}, {locale}.".format( resource=t.resource, locale=t.locale.code ) ) bulk_update(translated_resources, update_fields=["total_strings"]) # total_strings missmatch in ProjectLocales within the same project for p in Project.objects.available(): count = ( ProjectLocale.objects.filter(project=p) .values("total_strings") .distinct() .count() ) if count > 1: for pl in ProjectLocale.objects.filter(project=p): pl.aggregate_stats() # approved + fuzzy + errors + warnings > total in TranslatedResource for t in ( TranslatedResource.objects.filter( resource__project__disabled=False, resource__project__sync_disabled=False, ) .annotate( total=Sum( F("approved_strings") + F("fuzzy_strings") + F("strings_with_errors") + F("strings_with_warnings") ) ) .filter(total__gt=F("total_strings")) ): log.info( "Fix stats: total_strings overflow for {resource}, {locale}.".format( resource=t.resource, locale=t.locale.code ) ) t.calculate_stats() log.info("Sync complete.")
def bulk_update(self, objects, fields=None, exclude=None, batch_size=None, using='default', pk_field='pk'): """ Update all objects in the given list. Optionally, a list of fields to be updated can also be passed. Args: objects (sequence): List of model instances. fields: Optional lists of names to be found. batch_size (int): Maximum size of each batch sent for update. """ bulk_update(objects, meta=self.model._meta, update_fields=fields, exclude_fields=exclude, using=using, batch_size=batch_size, pk_field=pk_field)
def add_datetime_to_audio_files(apps, schema_editor): """ """ db_alias = schema_editor.connection.alias audio_file_model = apps.get_model('koe', 'AudioFile') audio_files = audio_file_model.objects.using(db_alias).all() slashed_url = os.path.join(settings.MEDIA_URL, 'audio/wav/{}', '{}.wav') unslashed_url = slashed_url[1:] wav_path_template = os.path.join(settings.BASE_DIR, unslashed_url) sys.stdout.write('\n') sys.stdout.write('\tAdding timestamp to {} AudioFiles...'.format(len(audio_files))) for audio_file in audio_files: if audio_file.original is None: database_id = audio_file.database.id file_name = audio_file.name else: database_id = audio_file.original.database.id file_name = audio_file.original.name file_path = wav_path_template.format(database_id, file_name) if os.path.isfile(file_path): last_modif_timestamp = os.path.getmtime(file_path) last_modif_datetime = datetime.datetime.utcfromtimestamp(last_modif_timestamp) else: last_modif_datetime = datetime.datetime.utcfromtimestamp(0) audio_file.added = pytz.utc.localize(last_modif_datetime) bulk_update(audio_files, update_fields=['added'], batch_size=10000)
def geocode_rows(upload=None): retry = False addresses = Location.objects.filter(is_geocoded=1) if upload is not None: addresses = addresses.filter(upload_id=upload) for address in addresses: gecode_url = GEOCODE_URL.format(address.address, API_KEY) response = requests.get(gecode_url).json() if response['status'] == 'OK': results = response['results'] if len(results) > 0: result = results[0] lat = result['geometry']['location']['lat'] lng = result['geometry']['location']['lng'] else: lat = lng = '' address.lat_long = str(lat) + ',' + str(lng) address.is_geocoded = 2 elif results['status'] == 'OVER_QUERY_LIMIT': retry = True break bulk_update(addresses, update_fields=['is_geocoded', 'lat_long']) if retry == False: upload_obj = Upload.objects.get(pk=upload) upload_obj.geocoding_status = 2 upload_obj.save() return retry
def import_margin(parameter): csv_path = os.path.join(settings.KABUPLUS_DIRECTORY, 'jsf-balance-data', 'daily', 'jsf-balance-data.csv') created = datetime(*time.localtime(os.path.getctime(csv_path))[:3]) updated_security_margins = [] security_codes = {c.code : c for c in SecurityCode.objects.all()} dataset = pandas.read_csv(csv_path, encoding="shift_jis") for index, row in dataset.iterrows(): code = str(row[0]) market = str(row[2]) jsf_margin_type = str(row[5]) jsf_buy_new = parse_integer(row[6]) jsf_buy_refund = parse_integer(row[7]) jsf_buy_balance = parse_integer(row[8]) jsf_sell_new = parse_integer(row[9]) jsf_sell_refund = parse_integer(row[10]) jsf_sell_balance = parse_integer(row[11]) jsf_net_balance = parse_integer(row[12]) jsf_net_change = parse_integer(row[13]) # 東証以外(名証など)のデータは必要ないので読み捨てる if market != u'東証': continue security_code = security_codes.get(code) if security_code is None: logging.warning('SecurityCode is not exist: code=' + code) continue try: security_margin = SecurityMargin.objects.get(security_code__code=code, date=created) security_margin.jsf_margin_type = jsf_margin_type security_margin.jsf_buy_new = jsf_buy_new security_margin.jsf_buy_refund = jsf_buy_refund security_margin.jsf_buy_balance = jsf_buy_balance security_margin.jsf_sell_new = jsf_sell_new security_margin.jsf_sell_refund = jsf_sell_refund security_margin.jsf_sell_balance = jsf_sell_balance security_margin.jsf_net_balance = jsf_net_balance security_margin.jsf_net_change = jsf_net_change updated_security_margins.append(security_margin) except SecurityMargin.DoesNotExist: security_code = security_codes.get(code) security_margin = SecurityMargin( security_code=security_code, date=created, jsf_margin_type=jsf_margin_type, jsf_buy_new=jsf_buy_new, jsf_buy_refund=jsf_buy_refund, jsf_buy_balance=jsf_buy_balance, jsf_sell_new=jsf_sell_new, jsf_sell_refund=jsf_sell_refund, jsf_sell_balance=jsf_sell_balance, jsf_net_balance=jsf_net_balance, jsf_net_change=jsf_net_change, ) security_margin.security_code.save() security_margin.save() bulk_update(updated_security_margins)
def bulk_update_translations(self): if len(self.translations_to_update) > 0: bulk_update(self.translations_to_update.values(), update_fields=[ 'entity', 'locale', 'string', 'plural_form', 'approved', 'approved_user_id', 'approved_date', 'rejected', 'fuzzy', 'extra' ])
def delete_application_medical_info(self): """ Delete all medical info saved on ``GeneralApplications`` """ applications = Application.objects.filter(trips_year=self.curr_year) for app in applications: app.clear_medical_info() bulk_update(applications, batch_size=SQLITE_BATCH_LIMIT)
def quote_folder_paths(state, schema): try: NodeSettings = state.get_model('addons_googledrive', 'nodesettings') targets = NodeSettings.objects.filter(folder_path__isnull=False) except LookupError: return for obj in targets: obj.folder_path = quote(obj.folder_path.encode('utf-8')) bulk_update(targets, update_fields=['folder_path'])
def run(self, profile="GO"): log.debug("Run: %s profile" % profile) i = 0 (N, z) = self._proteinid.shape (n,) = where(self._annotation == 1)[0].shape pvalues= [] enrich = [] for id in self._profileid: check = where(self._profile[:, i] == 1)[0] check = asarray(check).ravel() (K,) = check.shape function = asarray(self._profile[:, i]) function = function.ravel() overlap = function + self._annotation (k,) = where(overlap == 2)[0].shape if k > 0: pvalues.append(self.hypergeometric(N, K, n, k)) enrich.append(Enrichment(experiment_id=self._experimentid, Profile=profile, \ ProfileID=int(self._profileid[i]), Pvalue= self.hypergeometric(N, K, n, k))) else: enrich.append(Enrichment(experiment_id=self._experimentid, Profile=profile, \ ProfileID=int(self._profileid[i]), Pvalue= 1.0)) i += 1 if Enrichment.objects.filter(experiment_id=self._experimentid, Profile=profile).exists(): enrichment = Enrichment.objects.filter(experiment_id=self._experimentid, Profile=profile).order_by("Pvalue") pvalues = [] for enrich in enrichment: pvalues.append(enrich.Pvalue) pvaluesbh = self.BH(pvalues) i = 0 for enrich in enrichment: enrich.BHPvalue = pvaluesbh[i] i += 1 bulk_update(enrichment, batch_size=500) else: Enrichment.objects.bulk_create(enrich, batch_size=500) enrichment = Enrichment.objects.filter(experiment_id=self._experimentid, Profile=profile).order_by("Pvalue") pvalues = [] for enrich in enrichment: pvalues.append(enrich.Pvalue) pvaluesbh = self.BH(pvalues) i = 0 for enrich in enrichment: enrich.BHPvalue = pvaluesbh[i] i += 1 bulk_update(enrichment, batch_size=500)
def update_article(self, article_id_arr): """This function updates the score of the already stored articles.""" try: article_qs = Article.objects.filter(article_id__in=article_id_arr) for article in article_qs: article.upvote_count = self.fetch_article( article.article_id)['score'] bulk_update(article_qs) except Exception: pass
def mark_all_unread(self, request, pk=None): try: alerts = Alert.objects.filter(user=request.user) except Alert.DoesNotExist: return HttpResponse(status=404) for alert in alerts: alert.view = False bulk_update(alerts, update_fields=['view']) return Response(status=status.HTTP_200_OK)
def remove_locale_data(apps, schema_editor): Locale = apps.get_model('base', 'Locale') locales = Locale.objects.all() for l in locales: l.script = 'Latin' l.direction = 'ltr' l.population = 0 if locales: bulk_update(locales)
def base_scenario_totals(): for obj in base_section_qs.iterator(): obj.stage_0 = round(D(base_section_qst.aggregate(Sum('commodity_data__stage_0')).values()[0]),2) obj.stage_1 = round(D(base_section_qst.aggregate(Sum('commodity_data__stage_1')).values()[0]),2) obj.stage_2 = round(D(base_section_qst.aggregate(Sum('commodity_data__stage_2')).values()[0]),2) obj.stage_3 = round(D(base_section_qst.aggregate(Sum('commodity_data__stage_3')).values()[0]),2) obj.stage_4 = round(D(base_section_qst.aggregate(Sum('commodity_data__stage_4')).values()[0]),2) obj.stage_5 = round(D(base_section_qst.aggregate(Sum('commodity_data__stage_5')).values()[0]),2) obj.stage_6 = round(D(base_section_qst.aggregate(Sum('commodity_data__stage_6')).values()[0]),2) obj.stage_7 = round(D(base_section_qst.aggregate(Sum('commodity_data__stage_7')).values()[0]),2) obj.stage_8 = round(D(base_section_qst.aggregate(Sum('commodity_data__stage_8')).values()[0]),2) bulk_update(base_section_qs)
def sync(self, state, response, fs_path=None, pootle_path=None, update="all"): """ Synchronize all staged and non-conflicting files and Stores, and push changes upstream if required. :param fs_path: FS path glob to filter translations :param pootle_path: Pootle path glob to filter translations :returns response: Where ``response`` is an instance of self.respose_class """ self.sync_rm( state, response, fs_path=fs_path, pootle_path=pootle_path) if update in ["all", "pootle"]: self.sync_merge( state, response, fs_path=fs_path, pootle_path=pootle_path, update=update) self.sync_pull( state, response, fs_path=fs_path, pootle_path=pootle_path) if update in ["all", "fs"]: self.sync_push( state, response, fs_path=fs_path, pootle_path=pootle_path) self.push(response) sync_types = [ "pushed_to_fs", "pulled_to_pootle", "merged_from_pootle", "merged_from_fs"] fs_to_update = {} file_hashes = state.resources.file_hashes pootle_revisions = state.resources.pootle_revisions for sync_type in sync_types: if sync_type in response: for response_item in response.completed(sync_type): store_fs = response_item.store_fs last_sync_revision = None if store_fs.store_id in pootle_revisions: last_sync_revision = pootle_revisions[store_fs.store_id] file_hash = file_hashes.get( store_fs.pootle_path, store_fs.file.latest_hash) store_fs.file.on_sync( file_hash, last_sync_revision, save=False) fs_to_update[store_fs.id] = store_fs if fs_to_update: bulk_update( fs_to_update.values(), update_fields=[ "last_sync_revision", "last_sync_hash", "resolve_conflict", "staged_for_merge"]) if response.made_changes: self.expire_sync_cache() return response
def test_bulk_update_objects(self, Factory, django_assert_num_queries): objects = [] ids = range(0, 5) for id in ids: objects.append(Factory()) try: dtfield = [x.name for x in objects[0]._meta.get_fields() if isinstance(x, DateTimeField)][0] except IndexError: pytest.skip('Thing doesn\'t have a DateTimeField') for obj in objects: setattr(obj, dtfield, timezone.now()) with django_assert_num_queries(1): bulk_update(objects)
def bulk_update_translations(self): if len(self.translations_to_update) > 0: bulk_update(self.translations_to_update, update_fields=[ 'entity', 'locale', 'string', 'plural_form', 'approved', 'approved_user_id', 'approved_date', 'fuzzy', 'extra' ])
def unescape_quotes(apps, schema_editor): Translation = apps.get_model('base', 'Translation') translations_to_update = [] translations = Translation.objects.filter( entity__resource__path__contains="mobile/android/base" ) for translation in translations.filter( Q(string__contains="\\u0022") | Q(string__contains="\\u0027") ): translation.string = translation.string.replace('\\u0022', '"').replace('\\u0027', "'") translations_to_update.append(translation) bulk_update(translations_to_update, update_fields=['string'])
def update_comments_viewed_timestamp(node, current_wiki_guid, desired_wiki_object): """Replace the current_wiki_object keys in the comments_viewed_timestamp dict with the desired wiki_object_id """ users_pending_save = [] # We iterate over .contributor_set instead of .contributors in order # to take advantage of .include('contributor__user') for contrib in node.contributor_set.all(): user = contrib.user if user.comments_viewed_timestamp.get(current_wiki_guid, None): timestamp = user.comments_viewed_timestamp[current_wiki_guid] user.comments_viewed_timestamp[desired_wiki_object._id] = timestamp del user.comments_viewed_timestamp[current_wiki_guid] users_pending_save.append(user) if users_pending_save: bulk_update(users_pending_save, update_fields=['comments_viewed_timestamp']) return users_pending_save
def delete_trippee_medical_info(self): """ Delete all medical info saved on ``IncomingStudents`` and ``Registrations``. """ incoming = IncomingStudent.objects.filter(trips_year=self.curr_year) for inc in incoming: inc.med_info = '' registrations = Registration.objects.filter(trips_year=self.curr_year) for reg in registrations: reg.clear_medical_info() bulk_update(incoming, batch_size=SQLITE_BATCH_LIMIT) bulk_update(registrations, batch_size=SQLITE_BATCH_LIMIT)
def add_locale_data(apps, schema_editor): Locale = apps.get_model('base', 'Locale') locales = Locale.objects.all() for l in locales: try: data = LOCALES[l.code] l.script = data.get('script') l.direction = data.get('direction', 'ltr') l.population = int(round(data.get('population') / 1000)) * 1000 except KeyError: pass if locales: bulk_update(locales)
def import_from_lang_detection_results(dataset_id, detection_results_filename): dataset = Dataset.objects.get(id=dataset_id) #messages = dataset.message_set.all()#.exclude(time__isnull=True) messages = dataset.message_set.exclude(participant__id=2) messages = messages.filter(type=0) messages_with_lang = read_lang_detection_results(detection_results_filename) current_bulk = [] for msg in messages_with_lang: msg_obj = messages.get(id=msg['id']) msg_obj.detected_language = msg['lang'].title() current_bulk.append(msg_obj) bulk_update(current_bulk)
def bulk_update_translations(self): if len(self.translations_to_update) > 0: bulk_update( self.translations_to_update, update_fields=[ "entity", "locale", "string", "plural_form", "approved", "approved_user_id", "approved_date", "fuzzy", "extra", ], )
def migrate_locales(apps, schema_editor): """ Add alternative locale codes for our integrations with Microsoft products. """ Locale = apps.get_model('base', 'Locale') locale_map = {l.code: l for l in Locale.objects.all()} for pontoon_code, ms_code in filter(lambda x: x[1], TRANSLATOR_LOCALES.items()): if pontoon_code in locale_map: locale_map[pontoon_code].ms_translator_code = ms_code for pontoon_code, ms_code in filter(lambda x: x[1], TERMINOLOGY_LOCALES.items()): if pontoon_code in locale_map: locale_map[pontoon_code].ms_terminology_code = ms_code bulk_update(locale_map.values(), update_fields=['ms_translator_code', 'ms_terminology_code'])
def execute(self): """ Execute the changes stored in this changeset. Execute can only be called once per changeset; subsequent calls raise a RuntimeError, even if the changes failed. """ if self.executed: raise RuntimeError('execute() can only be called once per changeset.') else: self.executed = True # Store locales and resources for FK relationships. self.locales = {l.code: l for l in Locale.objects.all()} self.resources = {r.path: r for r in self.db_project.resource_set.all()} # Perform the changes and fill the lists for bulk creation and # updating. self.execute_update_vcs() self.execute_create_db() self.execute_update_db() self.execute_obsolete_db() # Apply the built-up changes to the DB if len(self.entities_to_update) > 0: bulk_update(self.entities_to_update, update_fields=[ 'resource', 'string', 'string_plural', 'key', 'comment', 'order', 'source' ]) Translation.objects.bulk_create(self.translations_to_create) if len(self.translations_to_update) > 0: bulk_update(self.translations_to_update, update_fields=[ 'entity', 'locale', 'string', 'plural_form', 'approved', 'approved_user_id', 'approved_date', 'fuzzy', 'extra' ])
def clean_abs_file_paths(apps, schema_editor): """Replace wrong absolute store file paths by proper relative paths built based on store.pootle_path values. """ store_model = apps.get_model("pootle_store.Store") store_qs = store_model.objects.select_related("filetype") for project in apps.get_model("pootle_project.Project").objects.all(): if not project.treestyle == 'nongnu': continue stores = list(store_qs.filter( translation_project__project_id=project.id, file__startswith="/")) for store in stores: lang, prj, d, fn = split_pootle_path(store.pootle_path) store.file = os.path.join(prj, lang, d, fn) bulk_update(stores, update_fields=["file"])
def execute(self): """ Execute the changes stored in this changeset. Execute can only be called once per changeset; subsequent calls raise a RuntimeError, even if the changes failed. """ if self.executed: raise RuntimeError("execute() can only be called once per changeset.") else: self.executed = True # Store locales and resources for FK relationships. self.locales = {l.code: l for l in Locale.objects.all()} self.resources = {r.path: r for r in self.db_project.resource_set.all()} # Perform the changes and fill the lists for bulk creation and # updating. self.execute_update_vcs() self.execute_create_db() self.execute_update_db() self.execute_obsolete_db() # Apply the built-up changes to the DB if len(self.entities_to_update) > 0: bulk_update( self.entities_to_update, update_fields=["resource", "string", "string_plural", "key", "comment", "order", "source"], ) Translation.objects.bulk_create(self.translations_to_create) if len(self.translations_to_update) > 0: bulk_update( self.translations_to_update, update_fields=[ "entity", "locale", "string", "plural_form", "approved", "approved_user", "approved_date", "fuzzy", "extra", ], )