def download(args): """ Function for downloading all examples in AudioSet containing labels for given classes :param args: :return: """ print("Downloading classes from AudioSet.") for class_name in args.classes: utils.download(class_name, args)
def download_aug(path, overwrite=False): _AUG_DOWNLOAD_URLS = [( "http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz", "7129e0a480c2d6afb02b517bb18ac54283bfaa35", )] makedirs(path) for url, checksum in _AUG_DOWNLOAD_URLS: filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum) # extract with tarfile.open(filename) as tar: tar.extractall(path=path) shutil.move( os.path.join(path, "benchmark_RELEASE"), os.path.join(path, "VOCaug"), ) filenames = ["VOCaug/dataset/train.txt", "VOCaug/dataset/val.txt"] # generate trainval.txt with open(os.path.join(path, "VOCaug/dataset/trainval.txt"), "w") as outfile: for fname in filenames: fname = os.path.join(path, fname) with open(fname) as infile: for line in infile: outfile.write(line)
def download_coco(path, overwrite=False): _DOWNLOAD_URLS = [ ( "http://images.cocodataset.org/zips/train2017.zip", "10ad623668ab00c62c096f0ed636d6aff41faca5", ), ( "http://images.cocodataset.org/annotations/annotations_trainval2017.zip", "8551ee4bb5860311e79dace7e79cb91e432e78b3", ), ( "http://images.cocodataset.org/zips/val2017.zip", "4950dc9d00dbe1c933ee0170f5797584351d2a41", ), # ('http://images.cocodataset.org/annotations/stuff_annotations_trainval2017.zip', # '46cdcf715b6b4f67e980b529534e79c2edffe084'), # test2017.zip, for those who want to attend the competition. # ('http://images.cocodataset.org/zips/test2017.zip', # '4e443f8a2eca6b1dac8a6c57641b67dd40621a49'), ] makedirs(path) for url, checksum in _DOWNLOAD_URLS: filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum) # extract with zipfile.ZipFile(filename) as zf: zf.extractall(path=path)
def handle(self, *args, **kwargs): _, _, content = download(expand_gdrive_download_url(kwargs["infile"])) current_flags_count = Flag.objects.count() flags = [] if content is None: self.stderr.write("Cannot download file using url {}".format( kwargs["infile"])) return scores = defaultdict(float) for l in content.splitlines(): d = json.loads(l) try: pep = Person.objects.get(pk=int(d.get("id_PEP", 0))) rule = Rule.objects.get(pk=d.get("Rule")) except Person.DoesNotExist: self.stderr.write("PEP with id '{}' doesn't exists".format( d.get("id_PEP"))) continue except Rule.DoesNotExist: self.stderr.write("Rule with id '{}' doesn't exists".format( d.get("Rule"))) continue scores[pep.pk] += rule.weight flags.append( Flag(person=pep, rule=rule, data=d.get("flag_data", {}))) if len(flags) < current_flags_count * 0.9 and not kwargs["force"]: self.stderr.write( "Major decrease in number of flags (was {}, now {}), aborting". format(current_flags_count, len(flags))) return Flag.objects.all().delete() Flag.objects.bulk_create(flags) max_score = max(scores.values()) for pep_id, score in scores.items(): if score == max_score: self.stdout.write("Max score is {} (pep {})".format( score, pep_id)) break if max_score < 1. and not kwargs["force"]: self.stderr.write( "Max score is {} which is too low".format(max_score)) return Rule.objects.update(scale=10 / max_score) self.stdout.write("Import is complete, {} new flags added".format( len(flags)))
def download_sbu(path, overwrite=False): _DOWNLOAD_URLS = [ ('http://www3.cs.stonybrook.edu/~cvl/content/datasets/shadow_db/SBU-shadow.zip'), ] download_dir = os.path.join(path, 'downloads') makedirs(download_dir) for url in _DOWNLOAD_URLS: filename = download(url, path=path, overwrite=overwrite) # extract with zipfile.ZipFile(filename, "r") as zf: zf.extractall(path=path) print("Extracted", filename)
def download_voc(path, overwrite=False): _DOWNLOAD_URLS = [ ('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar', '34ed68851bce2a36e2a223fa52c661d592c66b3c'), ('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar', '41a8d6e12baa5ab18ee7f8f8029b9e11805b4ef1'), ('http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar', '4e443f8a2eca6b1dac8a6c57641b67dd40621a49')] makedirs(path) for url, checksum in _DOWNLOAD_URLS: filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum) # extract with tarfile.open(filename) as tar: tar.extractall(path=path)
def download_ade(path, overwrite=False): _AUG_DOWNLOAD_URLS = [ ('http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip', '219e1696abb36c8ba3a3afe7fb2f4b4606a897c7'), ('http://data.csail.mit.edu/places/ADEchallenge/release_test.zip', 'e05747892219d10e9243933371a497e905a4860c'), ] download_dir = os.path.join(path, 'downloads') makedirs(download_dir) for url, checksum in _AUG_DOWNLOAD_URLS: filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum) # extract with zipfile.ZipFile(filename, "r") as zip_ref: zip_ref.extractall(path=path)
def handle(self, *args, **options): peklun = User.objects.get(username="******") wks = get_spreadsheet().sheet1 for i, l in enumerate(wks.get_all_records()): # reopen it time from time to avoid disconnect by timeout if i % 2000 == 0 and i: wks = get_spreadsheet().sheet1 self.stdout.write('Processing line #{}'.format(i)) company_ipn = l.get("ІПН", "") company_name = l.get("Назва", "") person_id = l.get("id персони", "") company_id = l.get("id компанії", "") photo_url = l.get("Фото", "") person = None # First let's search for appropriate company company = self.process_company(company_id, company_ipn, company_name) # No company — no go if company is None: continue # Let's backwrite company id to the spreadsheet for further use if company.pk != company_id: company_id = company.pk wks.update_cell(i + 2, len(l.keys()), company.pk) person_name = l.get("ПІБ", "").strip() position = l.get("Посада", "").strip() person_dob = unicode(l.get("Дата народження", "")).strip() person_from = parse_date(l.get("Дата призначення", "")) person_to = parse_date(l.get("Дата звільнення", "")) doc_received = parse_date(l.get("Дата відповіді", "")) docs = l.get("Лінк на відповідь", "").strip() website = l.get("лінк на сайт", "").strip() # Now let's search for the person if person_name: last_name, first_name, patronymic, _ = parse_fullname( person_name) if not last_name: continue # First we search by person_id (if it's present) if person_id: try: person = Person.objects.get(pk=person_id) except Person.DoesNotExist: pass # If nothing is found we search for name (for now) if not person: try: person = Person.objects.get( first_name_uk__iexact=first_name, last_name_uk__iexact=last_name, patronymic_uk__iexact=patronymic) except Person.MultipleObjectsReturned: self.stderr.write( "Double person {}!".format(person_name)) except Person.DoesNotExist: pass # If nothing is found, let's create a record for that person if not person: person = Person() self.stderr.write( "Created new person {}".format(person_name)) person.first_name_uk = first_name person.last_name_uk = last_name person.patronymic_uk = patronymic Ua2RuDictionary.objects.get_or_create(term=first_name) Ua2RuDictionary.objects.get_or_create(term=last_name) Ua2RuDictionary.objects.get_or_create(term=patronymic) person.first_name_en = translitua(first_name) person.last_name_en = translitua(last_name) person.patronymic_en = translitua(patronymic) person.is_pep = True person.imported = True person.type_of_official = 1 # Parsing date (can be a full date or just a year or # year/month) if person_dob: person.dob = parse_date(person_dob) if len(person_dob) == 4: person.dob_details = 2 # Only year if len(person_dob) > 4 and len(person_dob) < 7: person.dob_details = 1 # month and year # Let's download the photo (if any) if not person.photo and photo_url: photo_name, photo_san_name, photo_content = download( photo_url, translitua(person_name)) if photo_name: person.photo.save(photo_san_name, ContentFile(photo_content)) else: self.stdout.write("Cannot download image %s for %s" % (photo_url, person_name)) person.save() # Let's write the person id back to the table. if person.pk != person_id: person_id = person.pk wks.update_cell(i + 2, len(l.keys()) - 1, person.pk) # Now let's download all supporting docs docs_downloaded = [] first_doc_name = False # There might be many of them for doc in docs.split(", "): doc_instance = None # we cannot download folders from google docs, so let's # skip them if doc and "folderview" not in doc \ and "drive/#folders" not in doc: doc = expand_gdrive_download_url(doc) doc_hash = sha1(doc).hexdigest() # Check, if docs try: doc_instance = Document.objects.get(hash=doc_hash) except Document.DoesNotExist: self.stdout.write( 'Downloading file {}'.format(doc)) doc_name, doc_san_name, doc_content = download(doc) doc_san_name = translitua(doc_san_name) if doc_name: doc_instance = Document(name_uk=doc_name, uploader=peklun, hash=doc_hash) doc_instance.doc.save(doc_san_name, ContentFile(doc_content)) doc_instance.save() else: self.stdout.write( 'Cannot download file {}'.format(doc)) if doc_instance: first_doc_name = doc_instance.name_uk docs_downloaded.append(doc_instance.doc.url) # Now let's setup links between person and companies links = Person2Company.objects.filter( (Q(date_established=person_from) | Q(date_established=mangle_date(person_from)) | Q(date_established__isnull=True)), (Q(date_finished=person_to) | Q(date_finished=mangle_date(person_to)) | Q(date_finished__isnull=True)), from_person=person, to_company=company) # Delete if there are doubling links # including those cases when dates were imported incorrectly # because of parse_date if len(links) > 1: links.delete() link, _ = Person2Company.objects.update_or_create( from_person=person, to_company=company, date_established=person_from, date_established_details=0, date_finished=person_to, date_finished_details=0) if not link.relationship_type: link.relationship_type = position # And translate them Ua2EnDictionary.objects.get_or_create( term=lookup_term(position)) # oh, and add links to supporting docs all_docs = docs_downloaded + website.split(", ") if all_docs: link.proof = ", ".join(filter(None, all_docs)) if first_doc_name: link.proof_title = first_doc_name link.date_confirmed = doc_received link.is_employee = True link.save()
def load_peps(apps, schema_editor): User = apps.get_model("auth", "User") Company = apps.get_model("core", "Company") Person = apps.get_model("core", "Person") Person2Company = apps.get_model("core", "Person2Company") Document = apps.get_model("core", "Document") peklun = User.objects.get(username="******") with open("core/dicts/new_peps.csv", "r") as fp: r = DictReader(fp, errors="ignore") for i, l in enumerate(r): print(i) company_ipn = l.get("ІПН", "") company_name = l.get("Назва", "") company = None if not company_ipn and not company_name: continue # Search by IPN first (if it's present) if company_ipn: try: company = Company.objects.get(edrpou=company_ipn) except Company.DoesNotExist: pass # then search by name (if it's present) if company_name: if company is None: try: company = Company.objects.get(name=company_name) except Company.DoesNotExist: pass if company is None: company = Company(state_company=True) # Set missing params if not company.name: company.name = company_name if not company.edrpou: company.edrpou = company_ipn company.save() person_name = l.get("ПІБ", "").strip() position = l.get("Посада", "").strip() person_dob = l.get("Дата народження", "").strip() person_from = l.get("Дата призначення", "").strip() person_to = l.get("Дата звільнення", "").strip() doc_received = l.get("Дата відповіді", "").strip() doc = l.get("Лінк на відповідь", "").strip() website = l.get("лінк на сайт", "").strip() if person_name: chunks = person_name.split(" ") if len(chunks) == 2: last_name = title(chunks[0]) first_name = title(chunks[1]) else: last_name = title(" ".join(chunks[:-2])) first_name = title(chunks[-2]) patronymic = title(chunks[-1]) # Kind of get_or_create try: person = Person.objects.get(first_name__iexact=first_name, last_name__iexact=last_name, patronymic__iexact=patronymic) except Person.DoesNotExist: person = Person(first_name=first_name, last_name=last_name, patronymic=patronymic) person.is_pep = True person.type_of_official = 1 if person_dob: person.dob = parse_date(person_dob) if len(person_dob) == 4: person.dob_details = 2 # Only year if len(person_dob) > 4 and len(person_dob) < 7: person.dob_details = 1 # month and year person.save() doc_instance = None if doc and "folderview" not in doc \ and "drive/#folders" not in doc: print(doc) doc = expand_gdrive_download_url(doc) doc_hash = sha1(doc).hexdigest() try: doc_instance = Document.objects.get(hash=doc_hash) except Document.DoesNotExist: doc_name, doc_san_name, doc_content = download(doc) doc_san_name = translitua(doc_san_name) if doc_name: doc_instance = Document(name=doc_name, uploader=peklun, hash=doc_hash) doc_instance.doc.save(doc_san_name, ContentFile(doc_content)) doc_instance.save() link, link_created = Person2Company.objects.update_or_create( from_person=person, to_company=company, date_established=parse_date(person_from), date_finished=parse_date(person_to)) if not link.relationship_type: link.relationship_type = position if doc_instance is not None: link.proof_title = doc_instance.name link.proof = doc_instance.doc.url link.date_confirmed = parse_date(doc_received) if not doc and website: link.proof = website link.save()