Example #1
0
def export_aliases(project, fh):
    """ Dump a list of all entity names to a CSV file. The table will contain the 
    active name of each entity, and one of the other existing names as an alias. """

    writer = DictWriter(fh, ['entity_id', 'alias', 'canonical'])
    writer.writeheader()

    alias = aliased(EntityProperty)
    canonical = aliased(EntityProperty)
    q = db.session.query(alias.value_string.label('alias'), alias.entity_id)
    q = q.join(Entity)
    q = q.join(canonical)
    q = q.filter(Entity.project_id==project.id)
    q = q.filter(alias.entity_id!=None)
    q = q.filter(alias.name=='name')
    q = q.filter(canonical.name=='name')
    q = q.filter(canonical.active==True)
    q = q.add_columns(canonical.value_string.label('canonical'))
    for row in q.all():
        #if row.alias == row.canonical:
        #    continue
        writer.writerow({
            'entity_id': str(row.entity_id),
            'alias': row.alias,
            'canonical': row.canonical
        })
Example #2
0
def export_aliases(project, fh):
    """ Dump a list of all entity names to a CSV file. The table will contain
    the active name of each entity, and one of the other existing names as an
    alias. """

    writer = DictWriter(fh, ['entity_id', 'schema', 'alias', 'canonical'])
    writer.writeheader()

    alias = aliased(Property)
    canonical = aliased(Property)
    schema = aliased(Schema)
    q = db.session.query(alias.value_string.label('alias'), alias.entity_id)
    q = q.join(Entity)
    q = q.join(schema)
    q = q.join(canonical)
    q = q.filter(Entity.project_id == project.id)
    q = q.filter(alias.entity_id != None)  # noqa
    q = q.filter(alias.name == 'name')
    q = q.filter(canonical.name == 'name')
    q = q.filter(canonical.active == True)  # noqa
    q = q.add_columns(canonical.value_string.label('canonical'))
    q = q.add_columns(schema.name.label('schema'))
    for row in q.all():
        writer.writerow({
            'entity_id': str(row.entity_id),
            'schema': row.schema,
            'alias': row.alias,
            'canonical': row.canonical
        })
Example #3
0
def studentcsv():
    f = StringIO()
    writer = DictWriter(f,STUDENT_FIELDS)
    writer.writerow( dict( (x,x) for x in STUDENT_FIELDS) )
    for student in models.Student.query.all():
        if student.project_id:
            try:
                project = student.project
                record = {STUDENT_FIELDS[0]: student.project_id,
                          STUDENT_FIELDS[1]: student.project.category.name,
                          STUDENT_FIELDS[2]: fx(student.firstname),
                          STUDENT_FIELDS[3]: fx(student.lastname),
                          STUDENT_FIELDS[4]: student.grade,
                          STUDENT_FIELDS[5]: student.gender,
                          STUDENT_FIELDS[6]: student.school.name,
                          STUDENT_FIELDS[7]: str(project.individual),
                          STUDENT_FIELDS[8]: str(project.forms.first().vafa or
                                            project.forms.first().vafb),
                          STUDENT_FIELDS[9]: str(project.forms.first().hsf),
                          STUDENT_FIELDS[10]: str(project.forms.first().phbaf)}
            except AttributeError as error:
                app.logger.error('ProjID:%s - ID:%s - %s %s\n%s\n%s' % 
                        (student.id, student.project.id, student.firstname,
                         student.lastname,vars(student.project),
                         error))
            if record:
                try:
                    writer.writerow(record)
                except UnicodeEncodeError:
                    app.logger.error("Unicode Error:\n%s"%record)

    return f.getvalue()
def find_all_matching_officers(min_percentage=0.75):
    notable_officers = set()
    officer_fingerprints = get_all_officer_fingerprints()
    sys.stderr.write("\nFinding matches...\n")
    writer = DictWriter(sys.stdout, [
        'Full Name (from persons)',
        'officer_id (from npo_officers)'
    ])
    writer.writeheader()

    for i, data in enumerate(gdocs_persons()):
        fingerprint = make_fingerprint(data['Full Name'])
        matching_ids = find_matching_officers(
            fingerprint,
            officer_fingerprints,
            excluded_ids=notable_officers,
            min_percentage=min_percentage
        )

        for officer_id in matching_ids:
            writer.writerow({
                'Full Name (from persons)': data['Full Name'],
                'officer_id (from npo_officers)': officer_id,
            })

        notable_officers.update(matching_ids)
        sys.stderr.write("\r%d" % (i + 1))
        sys.stderr.flush()

    sys.stderr.write("\nDone\n")
Example #5
0
def render_non_html(encoding, querySet):
    '''Renders non-html formats and returns an appropriate HttpResponse'''

    if encoding == 'csv':
        vals = querySet.values()
        response = HttpResponse(mimetype='text/csv')
        response['Content-Disposition'] = \
                'attachment; filename=genesets%d.csv' % len(vals)
        csvW = DictWriter(response, GenesetFieldNames)
        fieldsDict = {}
        for k in GenesetFieldNames:
            fieldsDict[k] = k
        csvW.writerow(fieldsDict)
        csvW.writerows(vals)
    elif encoding == 'xml':
        response = HttpResponse(mimetype='text/xml')
        response['Content-Disposition'] = \
                'attachment; filename=genesets%d.xml' % len(querySet)
        serializers.serialize("xml", querySet, stream=response)
    elif encoding == "json":
        response = HttpResponse(mimetype='application/json')
        response['Content-Disposition'] = \
                'attachment; filename=genesets%d.js' % len(querySet)
        serializers.serialize("json", querySet, stream=response)

    return response
Example #6
0
    def handle(self, *args, **options):
        if len(args) != 2:
            raise CommandError(
                'usage: python manage.py dump_results '
                '<template_file_path> '
                '<results_csv_file_path>'
            )

        # Get paths from args, and normalize them to absolute paths:
        template_file_path, results_csv_file_path = map(os.path.abspath, args)

        try:
            template = HitTemplate.objects.get(name=template_file_path)
        except ObjectDoesNotExist:
            sys.exit('There is no matching <template_file_path>.')

        completed_hits = template.hit_set.filter(completed=True)
        if not completed_hits.exists():
            sys.exit('There are no completed HITs.')

        fieldnames, rows = results_data(completed_hits)
        with open(results_csv_file_path, 'wb') as fh:
            writer = DictWriter(fh, fieldnames)
            writer.writeheader()
            for row in rows:
                writer.writerow(row)
Example #7
0
def find_all_matching_officers(min_percentage=0.75):
    notable_officers = set()
    officer_fingerprints = get_all_officer_fingerprints()
    sys.stderr.write("\nFinding matches...\n")
    writer = DictWriter(
        sys.stdout,
        ['Full Name (from persons)', 'officer_id (from npo_officers)'])
    writer.writeheader()

    for i, data in enumerate(gdocs_persons()):
        fingerprint = make_fingerprint(data['Full Name'])
        matching_ids = find_matching_officers(fingerprint,
                                              officer_fingerprints,
                                              excluded_ids=notable_officers,
                                              min_percentage=min_percentage)

        for officer_id in matching_ids:
            writer.writerow({
                'Full Name (from persons)': data['Full Name'],
                'officer_id (from npo_officers)': officer_id,
            })

        notable_officers.update(matching_ids)
        sys.stderr.write("\r%d" % (i + 1))
        sys.stderr.flush()

    sys.stderr.write("\nDone\n")
Example #8
0
def mappings_export(file, decided):
    """Export mappings to a CSV file."""
    writer = DictWriter(file, fieldnames=['left', 'right', 'judgement'])
    writer.writeheader()
    for mapping in Mapping.find_by_decision(decided):
        writer.writerow({
            'left': mapping.left_uid,
            'right': mapping.right_uid,
            'judgement': mapping.judgement
        })
Example #9
0
    def write_csv(self, outputdir, timestamp, items=None):
        path = os.path.join(
            outputdir, self.filename('csv', timestamp, **self.filter_kwargs))

        if items is None:
            items = self.get_items()

        with open(path, 'w') as csvfile:
            writer = DictWriter(csvfile, self.get_fields())
            writer.writeheader()
            for row in items:
                writer.writerow(row)

        return self
Example #10
0
    def write_csv(self, outputdir, timestamp, items=None):
        path = os.path.join(outputdir,
            self.filename('csv', timestamp, **self.filter_kwargs))

        if items is None:
            items = self.get_items()

        with open(path, 'w') as csvfile:
            writer = DictWriter(csvfile, self.get_fields())
            writer.writeheader()
            for row in items:
                writer.writerow(row)

        return self
Example #11
0
def export_csv_table(archive, model, name):
    file_path = os.path.join(_make_export_path(), '%s.csv' % name)
    log.info("Exporting CSV to %s...", file_path)
    writer = None
    with open(file_path, 'w') as fh:
        for obj in session.query(model):
            row = obj.to_row()
            if writer is None:
                writer = DictWriter(fh, row.keys())
                writer.writeheader()
            writer.writerow(row)

    url = archive.upload_file(file_path, mime_type='text/csv')
    if url is not None:
        os.unlink(file_path)
Example #12
0
def tocsv():
    f = StringIO()
    writer = DictWriter(f,CSV_FIELDS)
    writer.writerow( dict( (x,x) for x in CSV_FIELDS) )
    districts = models.District.query.order_by('name').all()
    for district in districts:
        schools = district.schools.order_by('name').all()
        for school in schools:
            students = school.students.join(models.Project).order_by('title')
            students = students.filter(models.Student.team_leader==True).all()
            for student in students:
                try:
                    record = {CSV_FIELDS[0]: student.project.id,
                              CSV_FIELDS[1]: fx("%s %s"%(student.firstname,
                                                    student.lastname)),
                              CSV_FIELDS[4]: fx(student.project.title),
                              CSV_FIELDS[5]: fx(student.project.category.name),
                              CSV_FIELDS[6]: fx(student.project.division),
                              CSV_FIELDS[7]: student.school.name,
                              CSV_FIELDS[8]: student.school.district.name,
                              CSV_FIELDS[9]: fx("%s %s"%(student.sponsor.firstname,
                                                    student.sponsor.lastname)),
                              CSV_FIELDS[10]: student.project.forms_submitted,
                              CSV_FIELDS[11]: student.project.notes,
                              }
                    team = student.project.student
                    team = team.filter(models.Student.team_leader==False).limit(2)
                    team = team.all()
                    i = 2
                    for student in team:
                        record[CSV_FIELDS[i]]= fx("%s %s"%(student.firstname,
                                                        student.lastname))
                        i += 1
                except AttributeError as error:
                    app.logger.error('ProjID:%s - ID:%s - %s %s\n%s\n%s' % 
                            (student.id, student.project.id, student.firstname,
                             student.lastname,vars(student.project),
                             error))


                try:
                    writer.writerow(record)
                except UnicodeEncodeError:
                    app.logger.error("Unicode Error:\n%s"%record)

    return f.getvalue()
def convert_file(file_path):
    result_path = file_path.replace('/wdvc16', '/processed_wdvc16').replace('.xml', '.csv')
    print 'writing to %s...' % result_path

    xml_pages = stream_pages(file_path)

    fieldnames = [u'revision_id', u'revisions_in_group', u'revision_comment', u'revision_timestamp',
                  u'page_id', u'page_group', u'page_ns', u'page_title', 
                  u'anonimous_ip', u'user_id', u'username']

    with open(result_path, 'w') as csv_file:
        writer = DictWriter(csv_file, fieldnames=fieldnames)
        writer.writeheader()

        for xml_page in tqdm(xml_pages):
            pages = parse_page(xml_page)
            for page in pages:
                writer.writerow(page)
Example #14
0
    def report(self):
        trello_members, google_members, board_members = self.auditor.get_members(
        )
        all_members = set()
        all_members.update(trello_members)
        all_members.update(google_members)
        for members in board_members.values():
            all_members.update(members)

        with open(self.filename, 'wb+') as fp:
            csv = DictWriter(fp, ['name'] + list(all_members))
            csv.writeheader()
            board_members['google'] = google_members
            board_members['trello'] = trello_members

            for board, members in board_members.items():
                row = {member: (member in members) for member in all_members}
                row['name'] = board
                csv.writerow(row)
def writetocsv(xmlfile=None, month=None, year=None, outfile='../data/data.csv'):
    if xmlfile is None:
        raise Exception, "No XML file passed"
    if month is None:
        raise Exception, "No month passed"
    if year is None:
        raise Exception, "No year passed"
    xmldata = etree.parse(xmlfile)
    csvwriter = None
    csvfile = open('../data/data.csv', 'a')
    for incident in xmldata.iter('DATA'):
        data = {'month': month, 'year': year}
        for field in incident.iterchildren():
            data[field.tag] = field.text
        if not csvwriter:
            csvwriter = DictWriter(csvfile, fieldnames=data.keys())
            csvwriter.writeheader()
        csvwriter.writerow(data)
    csvfile.close()
Example #16
0
def make_csv():
    data_files = os.listdir('scraped_data')
    fieldnames_by_locality = get_all_variables_by_locality()

    for zone_type in ['epci']:
        print "Make %s csv..." % zone_type

        locality_data_files = [
            data_file for data_file in data_files if zone_type in data_file
        ]

        variables_mapping = {
            'name': u'nom',
            'year': u'année',
            'zone_type': u'type de zone administrative',
            'population': u'population',
            'insee_code': u'cog (code officiel géographique)',
            'url': u'url'
        }

        fieldnames = ['year', 'zone_type', 'name', 'population', 'insee_code', 'url'] \
            + sorted(fieldnames_by_locality[zone_type].keys())

        variables_mapping.update(fieldnames_by_locality[zone_type])

        if zone_type == 'epci':
            fieldnames.append('siren')

        with open(os.path.join('nosdonnees', zone_type + '_all.csv'),
                  'w') as output:
            csv_output = DictWriter(output,
                                    fieldnames=fieldnames,
                                    encoding='utf-8')

            csv_output.writerow(variables_mapping)

            for locality_data_file in locality_data_files:
                with codecs.open(os.path.join('scraped_data',
                                              locality_data_file),
                                 encoding='utf-8') as input:
                    for line in input:
                        data = json.loads(line, encoding='utf-8')['data']
                        csv_output.writerow(data)
Example #17
0
def convert_pickled_pages_to_csv_dataset(in_file, out_pages_fpath, out_transitions_fpath, delete_in_file = True):
    pages_columns = set()
    transitions_columns = set()
    
    with open(in_file, 'r') as inf:
        while True:
            try:
                page = pickle.load(inf)
                pages_columns.update(page.as_dict().viewkeys())
                for trans in page.transitions:
                    transitions_columns.update(trans.as_dict({ "FROM_LABEL__%s" % l : 1
                                                              for l in page.labels }).viewkeys())
            except EOFError:
                break

    with open(out_pages_fpath, 'w') as pages_f, \
        open(out_transitions_fpath, 'w') as trans_f:
        pages_writer = DictWriter(pages_f,
                                  sorted(pages_columns),
                                  encoding = 'utf8')
        pages_writer.writeheader()
        trans_writer = DictWriter(trans_f,
                                  sorted(transitions_columns),
                                  encoding = 'utf8')
        trans_writer.writeheader()
        with open(in_file, 'r') as inf:
            while True:
                try:
                    page = pickle.load(inf)
                    pages_writer.writerow(page.as_dict())
                    for trans in page.transitions:
                        trans_writer.writerow(trans.as_dict({ "FROM_LABEL__%s" % l : 1
                                                             for l in page.labels }))
                except EOFError:
                    break

    if delete_in_file:
        os.remove(in_file)
def parse_file(xml_file):
    print 'converting %s to csv' % xml_file
    # csv file name
    new_file_path = xml_file.replace('wdvc16', 'converted_wdvc16').replace('.xml', '.csv')
    print 'writing to %s' % new_file_path

    # page by page generator of the xml file
    xml_file_by_pages = page_stream_generator(xml_file)

    # columns
    columns = [u'page_title', u'page_ns', u'page_id',
               u'revision_id', u'revision_timestamp', u'revision_comment',
               u'revision_model', u'revision_format', u'revision_count',
               u'username', u'user_id', u'ip_address']

    with open(new_file_path, 'w') as csv_file:
        writer = DictWriter(csv_file, fieldnames=columns)
        writer.writeheader()

        for xml_page in xml_file_by_pages:
            revisions_in_page = parse_page(xml_page)
            for page in revisions_in_page:
                writer.writerow(page)
def make_csv():
    data_files = os.listdir("scraped_data")
    fieldnames_by_locality = get_all_variables_by_locality()

    for zone_type in ["city", "department", "epci", "region"]:
        print "Make %s csv..." % zone_type

        locality_data_files = [data_file for data_file in data_files if zone_type in data_file]

        variables_mapping = {
            "name": u"nom",
            "year": u"année",
            "zone_type": u"type de zone administrative",
            "population": u"population",
            "insee_code": u"cog (code officiel géographique)",
            "url": u"url",
        }

        fieldnames = ["year", "zone_type", "name", "population", "insee_code", "url"] + sorted(
            fieldnames_by_locality[zone_type].keys()
        )

        variables_mapping.update(fieldnames_by_locality[zone_type])

        if zone_type == "epci":
            fieldnames.append("siren")

        with open(os.path.join("nosdonnees", zone_type + "_all.csv"), "w") as output:
            csv_output = DictWriter(output, fieldnames=fieldnames, encoding="utf-8")

            csv_output.writerow(variables_mapping)

            for locality_data_file in locality_data_files:
                with codecs.open(os.path.join("scraped_data", locality_data_file), encoding="utf-8") as input:
                    for line in input:
                        data = json.loads(line, encoding="utf-8")["data"]
                        csv_output.writerow(data)
Example #20
0
class SplitCSVPipeline(object):
    def open_spider(self, spider):
        self.links_file = open('links.csv', 'wb')
        self.results_file = open('results.csv', 'wb')

        self.links_writer = DictWriter(self.links_file,
                                       ['source', 'destination'])
        self.results_writer = DictWriter(self.results_file,
                                         ['url', 'status', 'next'])

        self.links_writer.writeheader()
        self.results_writer.writeheader()

    def close_spider(self, spider):
        self.results_file.close()
        self.links_file.close()

    def process_item(self, item, spider):
        if isinstance(item, Link):
            self.links_writer.writerow(item)
        if isinstance(item, Result):
            self.results_writer.writerow(item)

        return item
Example #21
0
def make_csv():
    data_files = os.listdir('scraped_data')
    fieldnames_by_locality = get_all_variables_by_locality()

    for zone_type in ['city', 'epci', 'department', 'region']:
        print "Make %s csv..." % zone_type

        locality_data_files = [data_file for data_file in data_files if zone_type in data_file]

        variables_mapping = {
            'name': u'nom',
            'year': u'année',
            'zone_type': u'type de zone administrative',
            'population': u'population',
            'insee_code': u'cog (code officiel géographique)',
            'url': u'url'
        }

        fieldnames = ['year', 'zone_type', 'name', 'population', 'insee_code', 'url'] \
            + sorted(fieldnames_by_locality[zone_type].keys())

        variables_mapping.update(fieldnames_by_locality[zone_type])

        if zone_type == 'epci':
            fieldnames.append('siren')

        with open(os.path.join('nosdonnees', zone_type + '_all.csv'), 'w') as output:
            csv_output = DictWriter(output, fieldnames=fieldnames, encoding='utf-8')

            csv_output.writerow(variables_mapping)

            for locality_data_file in locality_data_files:
                with codecs.open(os.path.join('scraped_data', locality_data_file), encoding='utf-8') as input:
                    for line in input:
                        data = json.loads(line, encoding='utf-8')['data']
                        csv_output.writerow(data)
with open("/tmp/coowned_by_state.csv", "w") as fp:
    w = DictWriter(fp, fieldnames=["pep_company_name", "pep_company_link"] + fieldnames)

    w.writeheader()
    coowned_by_state = 0
    for c in Company.objects.filter(state_company=True).nocache().iterator():
        edrpou = c.edrpou.lstrip("0")
        if edrpou and edrpou in smida_owner_records:
            coowned_by_state += 1
            for l in smida_owner_records[edrpou]:
                rec = l.copy()
                rec["pep_company_name"] = c.name_uk
                rec["pep_company_link"] = u"{}{}".format(settings.SITE_URL, c.get_absolute_url())
                smida_indirect_records[l["EDRPOU"].strip().lstrip("0")].append(rec)

                w.writerow(rec)

    print("Coowned by state: {}".format(coowned_by_state))


with open("/tmp/coowned_indirectly_by_state.csv", "w") as fp:
    w = DictWriter(fp, fieldnames=["pep_company_name", "pep_company_link"] + fieldnames)

    w.writeheader()
    coowned_indirectly_by_state = 0
    for edrpou in smida_indirect_records:
        coowned_indirectly_by_state += 1
        for l in smida_indirect_records[edrpou]:
            w.writerow(l)

    print("Coowned indirectly by state: {}".format(coowned_indirectly_by_state))
Example #23
0
        if p2c.declarations:
            from_declaration = True
            years = set(
                Declaration.objects.filter(
                    pk__in=p2c.declarations).values_list("year", flat=True))
        else:
            years = map(
                lambda x: x.year,
                filter(None, [
                    p2c.date_established, p2c.date_finished, p2c.date_confirmed
                ]))
            if years:
                years = map(unicode, range(min(years), max(years) + 1))

        w.writerow({
            "pep":
            p2c.from_person.full_name,
            "url":
            "https://pep.org.ua{}".format(p2c.from_person.get_absolute_url()),
            "company_name":
            unicode(p2c.to_company),
            "edrpou":
            p2c.to_company.edrpou,
            "years":
            ", ".join(sorted(years)),
            "from_declaration":
            from_declaration,
            "person_type":
            "owner"
            if u"бенеф" in p2c.relationship_type_uk.lower() else "founder"
        })
Example #24
0
        itemtype,
        'title':
        g.value(book, DC.title),
        'date':
        g.value(book, DC.date) or '',  # not all have dates
        'tags':
        ', '.join(tags),
        '# tags':
        len(tags),
        '# tags ending in Y':
        len([t for t in tags if t.endswith('Y')])
    })

items = sorted(items, key=lambda k: k['# tags ending in Y'], reverse=True)

# generate csv file name based on input file
filebase, ext = os.path.splitext(os.path.basename(args.filename))
csv_filename = '%s.csv' % filebase

with open(csv_filename, 'w') as csvfile:
    # write byte-order-mark for utf-8 opening in
    csvfile.write(codecs.BOM_UTF8)
    fieldnames = [
        'identifier', 'type', 'title', 'date', '# tags', '# tags ending in Y',
        'tags'
    ]
    writer = DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for item in items:
        writer.writerow(item)
Example #25
0
    w.writeheader()
    for c2c in Company2Country.objects.filter(
            to_country__name_uk__in=countries_list).select_related(
                "to_country"):
        related = c2c.from_company.all_related_persons

        def joiner(persons):
            return u"\n".join([
                u"{}, https://pep.org.ua{}".format(p.full_name,
                                                   p.get_absolute_url())
                for p in persons
            ])

        w.writerow({
            "country":
            c2c.to_country.name_uk,
            "company_name":
            c2c.from_company.name_uk,
            "company_code":
            c2c.from_company.edrpou,
            "company_url":
            "https://pep.org.ua{}".format(c2c.from_company.get_absolute_url()),
            "founders":
            joiner(related["founders"]),
            "managers":
            joiner(related["managers"]),
            "rest":
            joiner(related["rest"]),
            "sanctions":
            joiner(related["sanctions"]),
        })
Example #26
0
from tqdm import tqdm

activate(settings.LANGUAGE_CODE)

with open("/tmp/positions.csv", "w") as fp:
    w = DictWriter(fp, fieldnames=["person", "relation", "company", "url"])

    w.writeheader()

    for p2c in tqdm(Person2Company.objects.all().select_related(
            "from_person", "to_company").nocache().iterator()):
        w.writerow({
            "person":
            p2c.from_person.full_name,
            "relation":
            p2c.relationship_type,
            "company":
            p2c.to_company.name,
            "url":
            "https://pep.org.ua{}".format(p2c.from_person.get_absolute_url()),
        })

with open("/tmp/relations.csv", "w") as fp:
    w = DictWriter(fp,
                   fieldnames=[
                       "company1", "relation", "back_relation", "company2",
                       "url"
                   ])

    w.writeheader()

    for c2c in tqdm(Company2Company.objects.all().select_related(
                dates_1[0] = date(1991, 1, 1)

            if dates_2[0] is None:
                dates_2[0] = date(1991, 1, 1)

            if dates_1[1] is None:
                dates_1[1] = date.today()

            if dates_2[1] is None:
                dates_2[1] = date.today()

            overlap = (dates_1[0] <= dates_2[1] and dates_2[0] <= dates_1[1])

            if not overlap:
                w.writerow({
                    "company": company,
                    "person1": p2p.from_person,
                    "person2": p2p.to_person,
                    "p1_to_p2": p2p.from_relationship_type,
                    "p1_to_c":
                    personnel[p2p.from_person_id].relationship_type_uk,
                    "p2_to_c":
                    personnel[p2p.to_person_id].relationship_type_uk,
                    "p1_from": dates_1[0],
                    "p1_to": dates_1[1],
                    "p2_from": dates_2[0],
                    "p2_to": dates_2[1],
                })

                p2p.delete()
Example #28
0
for d in Declaration.objects.filter(nacp_declaration=True, confirmed="a").nocache().iterator():
    data = d.source["nacp_orig"]
    if isinstance(data.get("step_9"), dict):
        for cash_rec in data["step_9"].values():
            if not isinstance(cash_rec, dict):
                continue

            if (cash_rec.get("country", "1") or "1") == "1":
                continue

            rec_to_export = {
                "company_name": cash_rec.get("name"),
                "legalForm": cash_rec.get("legalForm"),
                "country": countries[cash_rec.get("country", "1") or "1"],
                "en_name": cash_rec.get("en_name"),
                "location": cash_rec.get("location"),
                "en_address": cash_rec.get("en_address"),
                "phone": cash_rec.get("phone"),
                "address": cash_rec.get("address"),
                "mail": cash_rec.get("mail"),
                "company_code": cash_rec.get("beneficial_owner_company_code"),
                "owner": "DECLARANT" if cash_rec.get("person") == "1" else "FAMILY"
            }

            if _search_db(rec_to_export) is None:
                w.writerow(rec_to_export)
            else:
                print(u"Company {} found in db, skippings".format(
                    rec_to_export["company_name"])
                )
Example #29
0
def write_csv(fieldnames, rows, path):
    with open(path, 'wb') as fh:
        writer = DictWriter(fh, fieldnames)
        writer.writeheader()
        for row in rows:
            writer.writerow(row)
#         print(h.HITId)
#         currhits[h.HITId] = h
#         print('{}: {}'.format(len(currhits), currhits))
#     # get_all_hits iterates through all your current HITs, grabbing 100 at a time
#     # best to break as soon as you get all the HITIds in your group
#     if len(currhits) == len(hitids):
#         break

currhits = {h.HITId: h for h in mtc.get_all_hits() if h.HITId in hitids}
print('{} Current HITs: {}'.format(len(currhits), sorted(currhits.keys())))

process_assignments(assignments, all_results, currhits)
outkeys.extend(list(sorted(answer_keys)))

# Structure of hits
# foo.Amount                        foo.Expiration                    foo.IntegerValue                  foo.QualificationTypeId
# foo.AssignmentDurationInSeconds   foo.FormattedPrice                foo.Keywords                      foo.RequesterAnnotation
# foo.AutoApprovalDelayInSeconds    foo.HIT                           foo.LocaleValue                   foo.RequiredToPreview
# foo.Comparator                    foo.HITGroupId                    foo.MaxAssignments                foo.Reward
# foo.Country                       foo.HITId                         foo.NumberOfAssignmentsAvailable  foo.Title
# foo.CreationTime                  foo.HITReviewStatus               foo.NumberOfAssignmentsCompleted
# foo.CurrencyCode                  foo.HITStatus                     foo.NumberOfAssignmentsPending    foo.expired
# foo.Description                   foo.HITTypeId                     foo.QualificationRequirement

with open(args.resultsfile, 'w') as outfile:
    dw = DictWriter(outfile, fieldnames=outkeys, delimiter='\t')
    dw.writeheader()

    for row in all_results:
        dw.writerow(row)
                    "companyName": companyName,
                    "industries": industries,
                    "totalViews": totalViews,
                    "url": url
                }
                totalJobs.append(item)
    except:
        pass


if __name__ == '__main__':
    """ Provide the location and companyName """
    location = "New York"
    companyName = "Airbnb"
    getjobs(companyName, location)
    print "Total jobs got ", len(totalJobs)
    file = open('finalData.csv', 'wb')
    fields = [
        'jobTitle', 'companyName', 'location', 'postedTime', 'totalViews',
        'jobDescription', 'industries', 'employmentType', 'experience',
        'employmentType', 'jobFunctions', 'url'
    ]
    csvfile = DictWriter(file,
                         fieldnames=fields,
                         quoting=QUOTE_ALL,
                         encoding="utf-8")
    csvfile.writeheader()
    for i in totalJobs:
        csvfile.writerow(i)
    file.close()

if __name__ == '__main__':
    print("Exporting tarefas to CSV...")
    with open(TAREFAS_DIR + '/../csv/data_exported.csv', 'wb') as fout:
        csv_writer = DictWriter(
            fout,
            fieldnames=[
                'id_tarefa', 'num_tarefa', 'titulo_tarefa', 'tipo_tarefa',
                'data_cadastro_tarefa', 'sistema_tarefa', 'data_inicio_tarefa',
                'subsistema_tarefa', 'data_deadline_tarefa',
                'aberta_por_tarefa', 'localizacao_analista_tarefa',
                'situacao_tarefa', 'horas_trabalhadas_tarefa',
                'gerente_relacionamento_tarefa', 'num_prioridade_tarefa',
                'andamento_tarefa', 'prioridade_tarefa', 'dados_build_log',
                'data_cadastro_log', 'atividade_log', 'situacao_log',
                'andamento_log', 'horas_trabalhadas_log', 'aberto_por_log',
                'revisao_svn_log'
            ])
        csv_writer.writeheader()
        for tarefa_filename in os.listdir(TAREFAS_DIR):
            id_tarefa = re.findall(r'(\d+)', tarefa_filename)[0]
            tarefa_filepath = TAREFAS_DIR + '/' + tarefa_filename
            with io.open(tarefa_filepath, 'r', encoding='utf-8') as fin:
                list_tarefa_logs = __to_list_tarefa_logs(fin.read())
                for tarefa_log in list_tarefa_logs:
                    tarefa_log.update({'id_tarefa': id_tarefa})
                    csv_writer.writerow(tarefa_log)

    print("Done!")
Example #33
0
                page('#parent-fieldname-contactPhone').text(),
                page('a.email').attr('href').replace('mailto:', ''),
                page('#parent-fieldname-eventUrl').attr('href') or ''
            ))),
            ('meta_submitter_email', '*****@*****.**'),
        )))

    with open("events.p", "wb") as dumpfile:
        pickle.dump(events, dumpfile)

# Write output
with open('output.csv', 'w') as csvfile:
    writer = DictWriter(csvfile, fieldnames=events[0].keys())
    writer.writeheader()
    for event in events:
        writer.writerow(event)

# Submit events
for event in events:
    util.submit_event(
        email=event['meta_submitter_email'],
        title=event['title'],
        description=event['content_description'],
        location=event['location'],
        start_date=event['start'].split('T')[0],
        start_time=event['start'].split('T')[1][:5],
        end_time=event['end'].split('T')[1][:5],
        base_url=base_url
    )

# Publish events & close tickets
from django.db import models
from unicodecsv import DictWriter
from core.models import Company

fieldnames = ["id", "url", "name", "head_is_pep"]

with open("/tmp/orphaned_companies.csv", "w") as fp:
    w = DictWriter(fp, fieldnames=fieldnames)
    w.writeheader()
    for c in Company.objects.annotate(cnt=models.Count(
            "from_persons__from_person",
            distinct=True,
            filter=models.Q(from_persons__from_person__is_pep=True))).filter(
                cnt=0):
        w.writerow({
            "id": c.id,
            "url": "https://pep.org.ua/" + c.get_absolute_url(),
            "name": c.name_uk,
            "head_is_pep": c.state_company
        })
Example #35
0
from core.models import Person2Person
from tqdm import tqdm
from unicodecsv import DictWriter

fp = open("/tmp/p2p.csv", "w")
w = DictWriter(fp, fieldnames=["person1", "person1_is", "person2", "person2_is", "proofs"])


qs = Person2Person.objects.select_related("from_person", "to_person").nocache()
w.writeheader()

for p2p in tqdm(qs.iterator(), total=qs.count()):
    w.writerow({
        "person1": p2p.from_person,
        "person2": p2p.to_person,
        "person1_is": p2p.get_from_relationship_type_display(),
        "person2_is": p2p.get_to_relationship_type_display(),
        "proofs": "\n".join([p.proof_title for p in p2p.proofs.all()])
    })
Example #36
0
def dump_csv(table, name):
    with open(name, 'w') as fh:
        writer = DictWriter(fh, fieldnames=table.columns)
        writer.writeheader()
        for row in table:
            writer.writerow(row)
Example #37
0
def write_csv(fieldnames, rows, path):
    with open(path, 'wb') as fh:
        writer = DictWriter(fh, fieldnames)
        writer.writeheader()
        for row in rows:
            writer.writerow(row)