def find_all_matching_officers(min_percentage=0.75):
    notable_officers = set()
    officer_fingerprints = get_all_officer_fingerprints()
    sys.stderr.write("\nFinding matches...\n")
    writer = DictWriter(sys.stdout, [
        'Full Name (from persons)',
        'officer_id (from npo_officers)'
    ])
    writer.writeheader()

    for i, data in enumerate(gdocs_persons()):
        fingerprint = make_fingerprint(data['Full Name'])
        matching_ids = find_matching_officers(
            fingerprint,
            officer_fingerprints,
            excluded_ids=notable_officers,
            min_percentage=min_percentage
        )

        for officer_id in matching_ids:
            writer.writerow({
                'Full Name (from persons)': data['Full Name'],
                'officer_id (from npo_officers)': officer_id,
            })

        notable_officers.update(matching_ids)
        sys.stderr.write("\r%d" % (i + 1))
        sys.stderr.flush()

    sys.stderr.write("\nDone\n")
Example #2
0
def export_aliases(project, fh):
    """ Dump a list of all entity names to a CSV file. The table will contain the 
    active name of each entity, and one of the other existing names as an alias. """

    writer = DictWriter(fh, ['entity_id', 'alias', 'canonical'])
    writer.writeheader()

    alias = aliased(EntityProperty)
    canonical = aliased(EntityProperty)
    q = db.session.query(alias.value_string.label('alias'), alias.entity_id)
    q = q.join(Entity)
    q = q.join(canonical)
    q = q.filter(Entity.project_id==project.id)
    q = q.filter(alias.entity_id!=None)
    q = q.filter(alias.name=='name')
    q = q.filter(canonical.name=='name')
    q = q.filter(canonical.active==True)
    q = q.add_columns(canonical.value_string.label('canonical'))
    for row in q.all():
        #if row.alias == row.canonical:
        #    continue
        writer.writerow({
            'entity_id': str(row.entity_id),
            'alias': row.alias,
            'canonical': row.canonical
        })
Example #3
0
def studentcsv():
    f = StringIO()
    writer = DictWriter(f,STUDENT_FIELDS)
    writer.writerow( dict( (x,x) for x in STUDENT_FIELDS) )
    for student in models.Student.query.all():
        if student.project_id:
            try:
                project = student.project
                record = {STUDENT_FIELDS[0]: student.project_id,
                          STUDENT_FIELDS[1]: student.project.category.name,
                          STUDENT_FIELDS[2]: fx(student.firstname),
                          STUDENT_FIELDS[3]: fx(student.lastname),
                          STUDENT_FIELDS[4]: student.grade,
                          STUDENT_FIELDS[5]: student.gender,
                          STUDENT_FIELDS[6]: student.school.name,
                          STUDENT_FIELDS[7]: str(project.individual),
                          STUDENT_FIELDS[8]: str(project.forms.first().vafa or
                                            project.forms.first().vafb),
                          STUDENT_FIELDS[9]: str(project.forms.first().hsf),
                          STUDENT_FIELDS[10]: str(project.forms.first().phbaf)}
            except AttributeError as error:
                app.logger.error('ProjID:%s - ID:%s - %s %s\n%s\n%s' % 
                        (student.id, student.project.id, student.firstname,
                         student.lastname,vars(student.project),
                         error))
            if record:
                try:
                    writer.writerow(record)
                except UnicodeEncodeError:
                    app.logger.error("Unicode Error:\n%s"%record)

    return f.getvalue()
Example #4
0
    def handle(self, *args, **options):
        if len(args) != 2:
            raise CommandError(
                'usage: python manage.py dump_results '
                '<template_file_path> '
                '<results_csv_file_path>'
            )

        # Get paths from args, and normalize them to absolute paths:
        template_file_path, results_csv_file_path = map(os.path.abspath, args)

        try:
            template = HitTemplate.objects.get(name=template_file_path)
        except ObjectDoesNotExist:
            sys.exit('There is no matching <template_file_path>.')

        completed_hits = template.hit_set.filter(completed=True)
        if not completed_hits.exists():
            sys.exit('There are no completed HITs.')

        fieldnames, rows = results_data(completed_hits)
        with open(results_csv_file_path, 'wb') as fh:
            writer = DictWriter(fh, fieldnames)
            writer.writeheader()
            for row in rows:
                writer.writerow(row)
Example #5
0
    def write(self, file_name, row):
        with self.lock:
            if file_name not in self.fhs:
                self.fhs[file_name] = open(make_path(file_name), 'wb')
                dw = DictWriter(self.fhs[file_name], row.keys())
                self.writers[file_name] = dw
                dw.writeheader()

            self.writers[file_name].writerow(row)
Example #6
0
    def write(self, file_name, row):
        with self.lock:
            if file_name not in self.fhs:
                self.fhs[file_name] = open(make_path(file_name), 'wb')
                dw = DictWriter(self.fhs[file_name], row.keys())
                self.writers[file_name] = dw
                dw.writeheader()

            self.writers[file_name].writerow(row)
Example #7
0
    def open_spider(self, spider):
        self.links_file = open('links.csv', 'wb')
        self.results_file = open('results.csv', 'wb')

        self.links_writer = DictWriter(self.links_file,
                                       ['source', 'destination'])
        self.results_writer = DictWriter(self.results_file,
                                         ['url', 'status', 'next'])

        self.links_writer.writeheader()
        self.results_writer.writeheader()
Example #8
0
def export_aliases(project, path):
    """ Dump a list of all entity names to a CSV file. The table will contain the 
    active name of each entity, and one of the other existing names as an alias. """
    with open(path, 'w') as fh:
        writer = DictWriter(fh, ['entity_id', 'alias', 'canonical', 'schemata'])
        writer.writeheader()
        q = Entity.all().filter_by(same_as=None)
        q = q.filter(Entity.project==project)
        for i, entity in enumerate(q):
            export_entity(entity, writer)
            if i % 100 == 0:
                log.info("Dumped %s entity names...", i)
Example #9
0
def export_aliases(project, path):
    """ Dump a list of all entity names to a CSV file. The table will contain the 
    active name of each entity, and one of the other existing names as an alias. """
    with open(path, 'w') as fh:
        writer = DictWriter(fh,
                            ['entity_id', 'alias', 'canonical', 'schemata'])
        writer.writeheader()
        q = Entity.all().filter_by(same_as=None)
        q = q.filter(Entity.project == project)
        for i, entity in enumerate(q):
            export_entity(entity, writer)
            if i % 100 == 0:
                log.info("Dumped %s entity names...", i)
Example #10
0
    def load_data(self, options):
        save_data = self.settings.get("__save_data__", False)
        if save_data:
            options['full_record'] = True
            try:
                os.makedirs("./saved_data")
                LOG.info("Saving data to %s.", os.path.abspath("./saved_data"))
            except OSError as exc:
                if exc.errno == errno.EEXIST and os.path.isdir("./saved_data"):
                    pass
                else:
                    raise

        if self.settings['protocol_version'] == '2':
            if self.settings['group_dn']:
                users = self.query_group(options)
            else:
                users = self.query_objects(options)
        else:
            if self.settings['group_dn']:
                users = self.query_group_paged(options)
            else:
                users = self.query_objects_paged(options)

        if save_data:
            data = []
            keys = set()
            for user in users:
                # Note: Not all user dicts contain all the fields. So, need to loop over
                #       all the users to make sure we don't miss any fields.
                keys.update(user.keys())
                data.append(user)

            used_keys = set(self.ldap_query_fields)
            unused_keys = set(keys) - used_keys
            if unused_keys:
                keys = sorted(used_keys) + ['unmapped ->'
                                            ] + sorted(unused_keys)
            else:
                keys = sorted(used_keys)

            with open('./saved_data/ldap.csv', 'w') as save_file:
                writer = DictUnicodeWriter(save_file, keys)
                writer.writeheader()
                writer.writerows(data)

            users = data

        for user in users:
            yield user
Example #11
0
    def write_csv(self, outputdir, timestamp, items=None):
        path = os.path.join(outputdir,
            self.filename('csv', timestamp, **self.filter_kwargs))

        if items is None:
            items = self.get_items()

        with open(path, 'w') as csvfile:
            writer = DictWriter(csvfile, self.get_fields())
            writer.writeheader()
            for row in items:
                writer.writerow(row)

        return self
Example #12
0
def tocsv():
    f = StringIO()
    writer = DictWriter(f,CSV_FIELDS)
    writer.writerow( dict( (x,x) for x in CSV_FIELDS) )
    districts = models.District.query.order_by('name').all()
    for district in districts:
        schools = district.schools.order_by('name').all()
        for school in schools:
            students = school.students.join(models.Project).order_by('title')
            students = students.filter(models.Student.team_leader==True).all()
            for student in students:
                try:
                    record = {CSV_FIELDS[0]: student.project.id,
                              CSV_FIELDS[1]: fx("%s %s"%(student.firstname,
                                                    student.lastname)),
                              CSV_FIELDS[4]: fx(student.project.title),
                              CSV_FIELDS[5]: fx(student.project.category.name),
                              CSV_FIELDS[6]: fx(student.project.division),
                              CSV_FIELDS[7]: student.school.name,
                              CSV_FIELDS[8]: student.school.district.name,
                              CSV_FIELDS[9]: fx("%s %s"%(student.sponsor.firstname,
                                                    student.sponsor.lastname)),
                              CSV_FIELDS[10]: student.project.forms_submitted,
                              CSV_FIELDS[11]: student.project.notes,
                              }
                    team = student.project.student
                    team = team.filter(models.Student.team_leader==False).limit(2)
                    team = team.all()
                    i = 2
                    for student in team:
                        record[CSV_FIELDS[i]]= fx("%s %s"%(student.firstname,
                                                        student.lastname))
                        i += 1
                except AttributeError as error:
                    app.logger.error('ProjID:%s - ID:%s - %s %s\n%s\n%s' % 
                            (student.id, student.project.id, student.firstname,
                             student.lastname,vars(student.project),
                             error))


                try:
                    writer.writerow(record)
                except UnicodeEncodeError:
                    app.logger.error("Unicode Error:\n%s"%record)

    return f.getvalue()
def writetocsv(xmlfile=None, month=None, year=None, outfile='../data/data.csv'):
    if xmlfile is None:
        raise Exception, "No XML file passed"
    if month is None:
        raise Exception, "No month passed"
    if year is None:
        raise Exception, "No year passed"
    xmldata = etree.parse(xmlfile)
    csvwriter = None
    csvfile = open('../data/data.csv', 'a')
    for incident in xmldata.iter('DATA'):
        data = {'month': month, 'year': year}
        for field in incident.iterchildren():
            data[field.tag] = field.text
        if not csvwriter:
            csvwriter = DictWriter(csvfile, fieldnames=data.keys())
            csvwriter.writeheader()
        csvwriter.writerow(data)
    csvfile.close()
Example #14
0
    def edr_export(self, request):
        data = []

        for rec_id in request.POST.getlist("iswear"):
            meta_id = request.POST.get("company_%s_id" % rec_id)
            res = EDRPOU.get(id=meta_id)
            if res:
                rec = res.to_dict()

                if isinstance(rec.get("founders"), list):
                    rec["founders"] = ";;;".join(rec["founders"])
                data.append(rec)

        if not data:
            self.message_user(request, "Нічого експортувати")
            return redirect(reverse("admin:edr_search"))

        fp = StringIO()
        w = DictWriter(fp, fieldnames=data[0].keys())
        w.writeheader()
        w.writerows(data)
        payload = fp.getvalue()
        fp.close()

        response = HttpResponse(payload, content_type="text/csv")

        response[
            "Content-Disposition"] = "attachment; filename=edr_{:%Y%m%d_%H%M}.csv".format(
                datetime.datetime.now())

        response["Content-Length"] = len(response.content)

        return response
Example #15
0
def export_aliases(project, fh):
    """ Dump a list of all entity names to a CSV file. The table will contain
    the active name of each entity, and one of the other existing names as an
    alias. """

    writer = DictWriter(fh, ['entity_id', 'schema', 'alias', 'canonical'])
    writer.writeheader()

    alias = aliased(Property)
    canonical = aliased(Property)
    schema = aliased(Schema)
    q = db.session.query(alias.value_string.label('alias'), alias.entity_id)
    q = q.join(Entity)
    q = q.join(schema)
    q = q.join(canonical)
    q = q.filter(Entity.project_id == project.id)
    q = q.filter(alias.entity_id != None)  # noqa
    q = q.filter(alias.name == 'name')
    q = q.filter(canonical.name == 'name')
    q = q.filter(canonical.active == True)  # noqa
    q = q.add_columns(canonical.value_string.label('canonical'))
    q = q.add_columns(schema.name.label('schema'))
    for row in q.all():
        writer.writerow({
            'entity_id': str(row.entity_id),
            'schema': row.schema,
            'alias': row.alias,
            'canonical': row.canonical
        })
Example #16
0
def main():
    prs = argparse.ArgumentParser()

    prs.add_argument('--count', type=int, default=100)

    prs.add_argument('file', type=file)

    args = prs.parse_args()

    count = args.count
    assert count > 0
    path = os.path.abspath(args.file.name)
    root, ext = os.path.splitext(path)
    new_path = '%s_trimmed_%s%s' % (root, count, ext)

    reader = DictReader(open(path))
    new_entries = []
    for i in range(count):
        new_entries.append(next(reader))

    with open(new_path, 'w') as new_file:
        writer = DictWriter(new_file, reader.unicode_fieldnames)
        writer.writeheader()
        writer.writerows(new_entries)

    print open(new_path).read()
Example #17
0
def from_files():
    basedir = "/Users/rikhoekstra/surfdrive/Shared/Documents/NIOD2017/International_MIgration"
    toread = [fl for fl in os.listdir(basedir)]
    result = []

    for fl in toread:
        infl = open(os.path.join(basedir, fl), 'rU')
        txt = infl.read()
        recs = txt.split("\n\n")[1:]
        for r in recs:
            rec = r.split('\n')
            res = {}
            for l in rec:
                item = l.split(' - ')
                #            print item
                #            for item in splitted:
                #           import pdb; pdb.set_trace()
                if len(item) > 1 and item[0].strip() in [
                        'AU', 'TI', 'PY', 'JO'
                ]:
                    res[item[0].strip()] = item[1].strip()
            result.append(res)

    flout = open('wileyrecs.csv', 'w')

    w = DictWriter(flout, ['AU', 'TI', 'PY', 'JO'])
    w.writeheader()
    w.writerows(result)
    flout.close()
    print('written: ', flout.name)
    return result
Example #18
0
def find_all_matching_officers(min_percentage=0.75):
    notable_officers = set()
    officer_fingerprints = get_all_officer_fingerprints()
    sys.stderr.write("\nFinding matches...\n")
    writer = DictWriter(
        sys.stdout,
        ['Full Name (from persons)', 'officer_id (from npo_officers)'])
    writer.writeheader()

    for i, data in enumerate(gdocs_persons()):
        fingerprint = make_fingerprint(data['Full Name'])
        matching_ids = find_matching_officers(fingerprint,
                                              officer_fingerprints,
                                              excluded_ids=notable_officers,
                                              min_percentage=min_percentage)

        for officer_id in matching_ids:
            writer.writerow({
                'Full Name (from persons)': data['Full Name'],
                'officer_id (from npo_officers)': officer_id,
            })

        notable_officers.update(matching_ids)
        sys.stderr.write("\r%d" % (i + 1))
        sys.stderr.flush()

    sys.stderr.write("\nDone\n")
Example #19
0
def render_non_html(encoding, querySet):
    '''Renders non-html formats and returns an appropriate HttpResponse'''

    if encoding == 'csv':
        vals = querySet.values()
        response = HttpResponse(mimetype='text/csv')
        response['Content-Disposition'] = \
                'attachment; filename=genesets%d.csv' % len(vals)
        csvW = DictWriter(response, GenesetFieldNames)
        fieldsDict = {}
        for k in GenesetFieldNames:
            fieldsDict[k] = k
        csvW.writerow(fieldsDict)
        csvW.writerows(vals)
    elif encoding == 'xml':
        response = HttpResponse(mimetype='text/xml')
        response['Content-Disposition'] = \
                'attachment; filename=genesets%d.xml' % len(querySet)
        serializers.serialize("xml", querySet, stream=response)
    elif encoding == "json":
        response = HttpResponse(mimetype='application/json')
        response['Content-Disposition'] = \
                'attachment; filename=genesets%d.js' % len(querySet)
        serializers.serialize("json", querySet, stream=response)

    return response
def make_csv():
    data_files = os.listdir("scraped_data")
    fieldnames_by_locality = get_all_variables_by_locality()

    for zone_type in ["city", "department", "epci", "region"]:
        print "Make %s csv..." % zone_type

        locality_data_files = [data_file for data_file in data_files if zone_type in data_file]

        variables_mapping = {
            "name": u"nom",
            "year": u"année",
            "zone_type": u"type de zone administrative",
            "population": u"population",
            "insee_code": u"cog (code officiel géographique)",
            "url": u"url",
        }

        fieldnames = ["year", "zone_type", "name", "population", "insee_code", "url"] + sorted(
            fieldnames_by_locality[zone_type].keys()
        )

        variables_mapping.update(fieldnames_by_locality[zone_type])

        if zone_type == "epci":
            fieldnames.append("siren")

        with open(os.path.join("nosdonnees", zone_type + "_all.csv"), "w") as output:
            csv_output = DictWriter(output, fieldnames=fieldnames, encoding="utf-8")

            csv_output.writerow(variables_mapping)

            for locality_data_file in locality_data_files:
                with codecs.open(os.path.join("scraped_data", locality_data_file), encoding="utf-8") as input:
                    for line in input:
                        data = json.loads(line, encoding="utf-8")["data"]
                        csv_output.writerow(data)
Example #21
0
def make_csv():
    data_files = os.listdir('scraped_data')
    fieldnames_by_locality = get_all_variables_by_locality()

    for zone_type in ['city', 'epci', 'department', 'region']:
        print "Make %s csv..." % zone_type

        locality_data_files = [data_file for data_file in data_files if zone_type in data_file]

        variables_mapping = {
            'name': u'nom',
            'year': u'année',
            'zone_type': u'type de zone administrative',
            'population': u'population',
            'insee_code': u'cog (code officiel géographique)',
            'url': u'url'
        }

        fieldnames = ['year', 'zone_type', 'name', 'population', 'insee_code', 'url'] \
            + sorted(fieldnames_by_locality[zone_type].keys())

        variables_mapping.update(fieldnames_by_locality[zone_type])

        if zone_type == 'epci':
            fieldnames.append('siren')

        with open(os.path.join('nosdonnees', zone_type + '_all.csv'), 'w') as output:
            csv_output = DictWriter(output, fieldnames=fieldnames, encoding='utf-8')

            csv_output.writerow(variables_mapping)

            for locality_data_file in locality_data_files:
                with codecs.open(os.path.join('scraped_data', locality_data_file), encoding='utf-8') as input:
                    for line in input:
                        data = json.loads(line, encoding='utf-8')['data']
                        csv_output.writerow(data)
Example #22
0
def mappings_export(file, decided):
    """Export mappings to a CSV file."""
    writer = DictWriter(file, fieldnames=['left', 'right', 'judgement'])
    writer.writeheader()
    for mapping in Mapping.find_by_decision(decided):
        writer.writerow({
            'left': mapping.left_uid,
            'right': mapping.right_uid,
            'judgement': mapping.judgement
        })
Example #23
0
    def write_csv(self, outputdir, timestamp, items=None):
        path = os.path.join(
            outputdir, self.filename('csv', timestamp, **self.filter_kwargs))

        if items is None:
            items = self.get_items()

        with open(path, 'w') as csvfile:
            writer = DictWriter(csvfile, self.get_fields())
            writer.writeheader()
            for row in items:
                writer.writerow(row)

        return self
Example #24
0
def export_csv_table(archive, model, name):
    file_path = os.path.join(_make_export_path(), '%s.csv' % name)
    log.info("Exporting CSV to %s...", file_path)
    writer = None
    with open(file_path, 'w') as fh:
        for obj in session.query(model):
            row = obj.to_row()
            if writer is None:
                writer = DictWriter(fh, row.keys())
                writer.writeheader()
            writer.writerow(row)

    url = archive.upload_file(file_path, mime_type='text/csv')
    if url is not None:
        os.unlink(file_path)
Example #25
0
class SplitCSVPipeline(object):
    def open_spider(self, spider):
        self.links_file = open('links.csv', 'wb')
        self.results_file = open('results.csv', 'wb')

        self.links_writer = DictWriter(self.links_file,
                                       ['source', 'destination'])
        self.results_writer = DictWriter(self.results_file,
                                         ['url', 'status', 'next'])

        self.links_writer.writeheader()
        self.results_writer.writeheader()

    def close_spider(self, spider):
        self.results_file.close()
        self.links_file.close()

    def process_item(self, item, spider):
        if isinstance(item, Link):
            self.links_writer.writerow(item)
        if isinstance(item, Result):
            self.results_writer.writerow(item)

        return item
Example #26
0
        def stream():
            buffer_ = StringIO()
            writer = DictWriter(buffer_,
                                header_dict.keys(),
                                delimiter=",",
                                quoting=csv.QUOTE_MINIMAL)

            # Write Header Row
            data = read_and_flush(writer, buffer_, header_dict)
            yield data

            count = 0
            # Write CSV
            for row in scanResponse:
                count += 1
                rows_data = {
                    key: text_type(value)
                    for key, value in row['_source'].items()
                    if key in header_dict.keys()
                }

                data = read_and_flush(writer, buffer_, rows_data)
                yield data
def convert_file(file_path):
    result_path = file_path.replace('/wdvc16', '/processed_wdvc16').replace('.xml', '.csv')
    print 'writing to %s...' % result_path

    xml_pages = stream_pages(file_path)

    fieldnames = [u'revision_id', u'revisions_in_group', u'revision_comment', u'revision_timestamp',
                  u'page_id', u'page_group', u'page_ns', u'page_title', 
                  u'anonimous_ip', u'user_id', u'username']

    with open(result_path, 'w') as csv_file:
        writer = DictWriter(csv_file, fieldnames=fieldnames)
        writer.writeheader()

        for xml_page in tqdm(xml_pages):
            pages = parse_page(xml_page)
            for page in pages:
                writer.writerow(page)
Example #28
0
def make_csv():
    data_files = os.listdir('scraped_data')
    fieldnames_by_locality = get_all_variables_by_locality()

    for zone_type in ['epci']:
        print "Make %s csv..." % zone_type

        locality_data_files = [
            data_file for data_file in data_files if zone_type in data_file
        ]

        variables_mapping = {
            'name': u'nom',
            'year': u'année',
            'zone_type': u'type de zone administrative',
            'population': u'population',
            'insee_code': u'cog (code officiel géographique)',
            'url': u'url'
        }

        fieldnames = ['year', 'zone_type', 'name', 'population', 'insee_code', 'url'] \
            + sorted(fieldnames_by_locality[zone_type].keys())

        variables_mapping.update(fieldnames_by_locality[zone_type])

        if zone_type == 'epci':
            fieldnames.append('siren')

        with open(os.path.join('nosdonnees', zone_type + '_all.csv'),
                  'w') as output:
            csv_output = DictWriter(output,
                                    fieldnames=fieldnames,
                                    encoding='utf-8')

            csv_output.writerow(variables_mapping)

            for locality_data_file in locality_data_files:
                with codecs.open(os.path.join('scraped_data',
                                              locality_data_file),
                                 encoding='utf-8') as input:
                    for line in input:
                        data = json.loads(line, encoding='utf-8')['data']
                        csv_output.writerow(data)
Example #29
0
    def report(self):
        trello_members, google_members, board_members = self.auditor.get_members(
        )
        all_members = set()
        all_members.update(trello_members)
        all_members.update(google_members)
        for members in board_members.values():
            all_members.update(members)

        with open(self.filename, 'wb+') as fp:
            csv = DictWriter(fp, ['name'] + list(all_members))
            csv.writeheader()
            board_members['google'] = google_members
            board_members['trello'] = trello_members

            for board, members in board_members.items():
                row = {member: (member in members) for member in all_members}
                row['name'] = board
                csv.writerow(row)
def parse_file(xml_file):
    print 'converting %s to csv' % xml_file
    # csv file name
    new_file_path = xml_file.replace('wdvc16', 'converted_wdvc16').replace('.xml', '.csv')
    print 'writing to %s' % new_file_path

    # page by page generator of the xml file
    xml_file_by_pages = page_stream_generator(xml_file)

    # columns
    columns = [u'page_title', u'page_ns', u'page_id',
               u'revision_id', u'revision_timestamp', u'revision_comment',
               u'revision_model', u'revision_format', u'revision_count',
               u'username', u'user_id', u'ip_address']

    with open(new_file_path, 'w') as csv_file:
        writer = DictWriter(csv_file, fieldnames=columns)
        writer.writeheader()

        for xml_page in xml_file_by_pages:
            revisions_in_page = parse_page(xml_page)
            for page in revisions_in_page:
                writer.writerow(page)
Example #31
0
                page('#parent-fieldname-description').text(),
                page('#parent-fieldname-text').text(),
                page('a.email').text(),
                page('#parent-fieldname-contactPhone').text(),
                page('a.email').attr('href').replace('mailto:', ''),
                page('#parent-fieldname-eventUrl').attr('href') or ''
            ))),
            ('meta_submitter_email', '*****@*****.**'),
        )))

    with open("events.p", "wb") as dumpfile:
        pickle.dump(events, dumpfile)

# Write output
with open('output.csv', 'w') as csvfile:
    writer = DictWriter(csvfile, fieldnames=events[0].keys())
    writer.writeheader()
    for event in events:
        writer.writerow(event)

# Submit events
for event in events:
    util.submit_event(
        email=event['meta_submitter_email'],
        title=event['title'],
        description=event['content_description'],
        location=event['location'],
        start_date=event['start'].split('T')[0],
        start_time=event['start'].split('T')[1][:5],
        end_time=event['end'].split('T')[1][:5],
        base_url=base_url
Example #32
0
def write_csv(fieldnames, rows, path):
    with open(path, 'wb') as fh:
        writer = DictWriter(fh, fieldnames)
        writer.writeheader()
        for row in rows:
            writer.writerow(row)
Example #33
0
res = []
for p2c in Person2Company.objects.filter(to_company_id=63).prefetch_related("from_person"):
    for d in Declaration.objects.filter(nacp_declaration=True, person=p2c.from_person, confirmed="a").order_by("year"):
        res.append({
            "name": p2c.from_person.full_name,
            "year": d.year,
            "id": d.declaration_id.replace("nacp_", "", 1)
        })


 with open("/tmp/mp_decls.csv", "w") as fp:
     from unicodecsv import DictWriter
     w = DictWriter(fp, fieldnames=res[0].keys())
     w.writerows(res)
#         print(h.HITId)
#         currhits[h.HITId] = h
#         print('{}: {}'.format(len(currhits), currhits))
#     # get_all_hits iterates through all your current HITs, grabbing 100 at a time
#     # best to break as soon as you get all the HITIds in your group
#     if len(currhits) == len(hitids):
#         break

currhits = {h.HITId: h for h in mtc.get_all_hits() if h.HITId in hitids}
print('{} Current HITs: {}'.format(len(currhits), sorted(currhits.keys())))

process_assignments(assignments, all_results, currhits)
outkeys.extend(list(sorted(answer_keys)))

# Structure of hits
# foo.Amount                        foo.Expiration                    foo.IntegerValue                  foo.QualificationTypeId
# foo.AssignmentDurationInSeconds   foo.FormattedPrice                foo.Keywords                      foo.RequesterAnnotation
# foo.AutoApprovalDelayInSeconds    foo.HIT                           foo.LocaleValue                   foo.RequiredToPreview
# foo.Comparator                    foo.HITGroupId                    foo.MaxAssignments                foo.Reward
# foo.Country                       foo.HITId                         foo.NumberOfAssignmentsAvailable  foo.Title
# foo.CreationTime                  foo.HITReviewStatus               foo.NumberOfAssignmentsCompleted
# foo.CurrencyCode                  foo.HITStatus                     foo.NumberOfAssignmentsPending    foo.expired
# foo.Description                   foo.HITTypeId                     foo.QualificationRequirement

with open(args.resultsfile, 'w') as outfile:
    dw = DictWriter(outfile, fieldnames=outkeys, delimiter='\t')
    dw.writeheader()

    for row in all_results:
        dw.writerow(row)
            log.update(tarefa)
            list_tarefa_logs.append(log)

    return list_tarefa_logs


if __name__ == '__main__':
    print("Exporting tarefas to CSV...")
    with open(TAREFAS_DIR + '/../csv/data_exported.csv', 'wb') as fout:
        csv_writer = DictWriter(
            fout,
            fieldnames=[
                'id_tarefa', 'num_tarefa', 'titulo_tarefa', 'tipo_tarefa',
                'data_cadastro_tarefa', 'sistema_tarefa', 'data_inicio_tarefa',
                'subsistema_tarefa', 'data_deadline_tarefa',
                'aberta_por_tarefa', 'localizacao_analista_tarefa',
                'situacao_tarefa', 'horas_trabalhadas_tarefa',
                'gerente_relacionamento_tarefa', 'num_prioridade_tarefa',
                'andamento_tarefa', 'prioridade_tarefa', 'dados_build_log',
                'data_cadastro_log', 'atividade_log', 'situacao_log',
                'andamento_log', 'horas_trabalhadas_log', 'aberto_por_log',
                'revisao_svn_log'
            ])
        csv_writer.writeheader()
        for tarefa_filename in os.listdir(TAREFAS_DIR):
            id_tarefa = re.findall(r'(\d+)', tarefa_filename)[0]
            tarefa_filepath = TAREFAS_DIR + '/' + tarefa_filename
            with io.open(tarefa_filepath, 'r', encoding='utf-8') as fin:
                list_tarefa_logs = __to_list_tarefa_logs(fin.read())
                for tarefa_log in list_tarefa_logs:
                    tarefa_log.update({'id_tarefa': id_tarefa})
                    csv_writer.writerow(tarefa_log)
Example #36
0
def convert_pickled_pages_to_csv_dataset(in_file, out_pages_fpath, out_transitions_fpath, delete_in_file = True):
    pages_columns = set()
    transitions_columns = set()
    
    with open(in_file, 'r') as inf:
        while True:
            try:
                page = pickle.load(inf)
                pages_columns.update(page.as_dict().viewkeys())
                for trans in page.transitions:
                    transitions_columns.update(trans.as_dict({ "FROM_LABEL__%s" % l : 1
                                                              for l in page.labels }).viewkeys())
            except EOFError:
                break

    with open(out_pages_fpath, 'w') as pages_f, \
        open(out_transitions_fpath, 'w') as trans_f:
        pages_writer = DictWriter(pages_f,
                                  sorted(pages_columns),
                                  encoding = 'utf8')
        pages_writer.writeheader()
        trans_writer = DictWriter(trans_f,
                                  sorted(transitions_columns),
                                  encoding = 'utf8')
        trans_writer.writeheader()
        with open(in_file, 'r') as inf:
            while True:
                try:
                    page = pickle.load(inf)
                    pages_writer.writerow(page.as_dict())
                    for trans in page.transitions:
                        trans_writer.writerow(trans.as_dict({ "FROM_LABEL__%s" % l : 1
                                                             for l in page.labels }))
                except EOFError:
                    break

    if delete_in_file:
        os.remove(in_file)
Example #37
0
            print(
                u"Too much companies returned for record '%s'" %
                company["company_name"]
            )
            return True

    return {
        "id": company_db.id,
        "code": company_db.edrpou,
        "name_uk": company_db.name_uk,
        "name_en": company_db.name_en,
    }


fp = open("beneficiary.csv", "w")
w = DictWriter(fp, fieldnames=["company_name", "legalForm", "country", "en_name", "location", "en_address", "phone", "address", "mail", "company_code", "owner"])
w.writeheader()

for d in Declaration.objects.filter(nacp_declaration=True, confirmed="a").nocache().iterator():
    data = d.source["nacp_orig"]
    if isinstance(data.get("step_9"), dict):
        for cash_rec in data["step_9"].values():
            if not isinstance(cash_rec, dict):
                continue

            if (cash_rec.get("country", "1") or "1") == "1":
                continue

            rec_to_export = {
                "company_name": cash_rec.get("name"),
                "legalForm": cash_rec.get("legalForm"),
Example #38
0
        itemtype,
        'title':
        g.value(book, DC.title),
        'date':
        g.value(book, DC.date) or '',  # not all have dates
        'tags':
        ', '.join(tags),
        '# tags':
        len(tags),
        '# tags ending in Y':
        len([t for t in tags if t.endswith('Y')])
    })

items = sorted(items, key=lambda k: k['# tags ending in Y'], reverse=True)

# generate csv file name based on input file
filebase, ext = os.path.splitext(os.path.basename(args.filename))
csv_filename = '%s.csv' % filebase

with open(csv_filename, 'w') as csvfile:
    # write byte-order-mark for utf-8 opening in
    csvfile.write(codecs.BOM_UTF8)
    fieldnames = [
        'identifier', 'type', 'title', 'date', '# tags', '# tags ending in Y',
        'tags'
    ]
    writer = DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for item in items:
        writer.writerow(item)
Example #39
0
from unicodecsv import DictWriter
from core.models import Company2Country

countries_list = [
    "Велика Британія", "Британські Віргінські острови", "Гібралтар",
    "Бермудські острови", "Кайманові острови", "Гернсі", "Джерсі", "Острів Мен"
]

with open("/tmp/uk_companies.csv", "w") as fp:
    w = DictWriter(fp,
                   fieldnames=[
                       "country", "company_name", "company_code",
                       "company_url", "founders", "managers", "rest",
                       "sanctions"
                   ])
    w.writeheader()
    for c2c in Company2Country.objects.filter(
            to_country__name_uk__in=countries_list).select_related(
                "to_country"):
        related = c2c.from_company.all_related_persons

        def joiner(persons):
            return u"\n".join([
                u"{}, https://pep.org.ua{}".format(p.full_name,
                                                   p.get_absolute_url())
                for p in persons
            ])

        w.writerow({
            "country":
            c2c.to_country.name_uk,
from django.db import models
from unicodecsv import DictWriter
from core.models import Company

fieldnames = ["id", "url", "name", "head_is_pep"]

with open("/tmp/orphaned_companies.csv", "w") as fp:
    w = DictWriter(fp, fieldnames=fieldnames)
    w.writeheader()
    for c in Company.objects.annotate(cnt=models.Count(
            "from_persons__from_person",
            distinct=True,
            filter=models.Q(from_persons__from_person__is_pep=True))).filter(
                cnt=0):
        w.writerow({
            "id": c.id,
            "url": "https://pep.org.ua/" + c.get_absolute_url(),
            "name": c.name_uk,
            "head_is_pep": c.state_company
        })
Example #41
0
from core.models import Person2Company, Company2Company
from unicodecsv import DictWriter
from django.utils.translation import activate
from django.conf import settings
from collections import Counter
from tqdm import tqdm

activate(settings.LANGUAGE_CODE)

with open("/tmp/positions.csv", "w") as fp:
    w = DictWriter(fp, fieldnames=["person", "relation", "company", "url"])

    w.writeheader()

    for p2c in tqdm(Person2Company.objects.all().select_related(
            "from_person", "to_company").nocache().iterator()):
        w.writerow({
            "person":
            p2c.from_person.full_name,
            "relation":
            p2c.relationship_type,
            "company":
            p2c.to_company.name,
            "url":
            "https://pep.org.ua{}".format(p2c.from_person.get_absolute_url()),
        })

with open("/tmp/relations.csv", "w") as fp:
    w = DictWriter(fp,
                   fieldnames=[
                       "company1", "relation", "back_relation", "company2",
Example #42
0
def dump_csv(table, name):
    with open(name, 'w') as fh:
        writer = DictWriter(fh, fieldnames=table.columns)
        writer.writeheader()
        for row in table:
            writer.writerow(row)
Example #43
0
        if not isinstance(s2, dict):
            continue
        if s2.get("previous_firstname") or s2.get(
                "previous_lastname") or s2.get("previous_middlename"):
            changes.append({
                "person": d.person_id,
                "first_name": s2.get("firstname", ""),
                "patronymic": s2.get("middlename", ""),
                "last_name": s2.get("lastname", ""),
                "prev_first_name": s2.get("previous_firstname", ""),
                "prev_patronymic": s2.get("previous_middlename", ""),
                "prev_last_name": s2.get("previous_lastname", ""),
            })

    if step_1.get("previous_firstname") or step_1.get(
            "previous_lastname") or step_1.get("previous_middlename"):
        changes.append({
            "person": d.person_id,
            "first_name": d.first_name,
            "patronymic": d.patronymic,
            "last_name": d.last_name,
            "prev_first_name": step_1.get("previous_firstname", ""),
            "prev_patronymic": step_1.get("previous_middlename", ""),
            "prev_last_name": step_1.get("previous_lastname", ""),
        })

with open("/tmp/changed_names.csv", "w") as fp:
    w = DictWriter(fp, fieldnames=changes[0].keys())
    w.writeheader()
    w.writerows(changes)