Example #1
0
def generateManifest(syn, allFiles, filename):
    """Generates a manifest file based on a list of entities objects.

    :param allFiles:   A list of File Entities

    :param filename: file where manifest will be written
    """
    keys = ['path', 'parent', 'name', 'synapseStore', 'contentType', 'used',
            'executed', 'activityName', 'activityDescription']
    annotKeys = set()
    data = []
    for entity in allFiles:
        row = {'parent': entity['parentId'], 'path': entity.path, 'name': entity.name,
               'synapseStore': entity.synapseStore, 'contentType': allFiles[0]['contentType']}
        row.update({key:val[0] for key, val in entity.annotations.items()})
        annotKeys.update(set(entity.annotations.keys()))
        try:
            prov = syn.getProvenance(entity)
            row['used'] = ';'.join(prov._getUsedStringList())
            row['executed'] = ';'.join(prov._getExecutedStringList())
            row['activityName'] = prov.get('name', '')
            row['activityDescription'] = prov.get('description', '')
        except SynapseHTTPError:
            pass # No provenance present
        data.append(row)
    keys.extend(annotKeys)

    with open(filename, 'w') as fp:
        csvWriter = csv.DictWriter(fp, keys, restval='', extrasaction='ignore', delimiter='\t')
        csvWriter.writeheader()
        for  row in data:
            csvWriter.writerow(row)
Example #2
0
    def write_to_csv(self):
        if self.num_results > 0:
            self.num_results = sum(1 for line in codecs.open(self.tmp_file, mode='r', encoding='utf-8'))
            if self.num_results > 0:
                output_file = codecs.open(self.opts.output_file, mode='a', encoding='utf-8')
                csv_writer = csv.DictWriter(output_file, fieldnames=self.csv_headers)
                csv_writer.writeheader()
                timer = 0
                widgets = ['Write to csv ',
                           progressbar.Bar(left='[', marker='#', right=']'),
                           progressbar.FormatLabel(' [%(value)i/%(max)i] ['),
                           progressbar.Percentage(),
                           progressbar.FormatLabel('] [%(elapsed)s] ['),
                           progressbar.ETA(), '] [',
                           progressbar.FileTransferSpeed(unit='lines'), ']'
                           ]
                bar = progressbar.ProgressBar(widgets=widgets, maxval=self.num_results).start()

                for line in codecs.open(self.tmp_file, mode='r', encoding='utf-8'):
                    timer += 1
                    bar.update(timer)
                    csv_writer.writerow(json.loads(line))
                output_file.close()
                bar.finish()
            else:
                print('There is no docs with selected field(s): {}.'.format(','.join(self.opts.fields)))
            os.remove(self.tmp_file)
Example #3
0
 def getoutput(self):
     output = csv.StringIO()
     writer = csv.DictWriter(output, self.fieldnames,
                             extrasaction='ignore',
                             dialect=self.dialect)
     writer.writeheader()
     for ce in self.units:
         writer.writerow(ce.todict())
     return output.getvalue()
Example #4
0
 def export(self):
     '''
     Export all data into a CSV file. Will overwrite existing file.
     '''
     with open(self.__csv_location, 'w', newline='', encoding='utf-8') as f:
         writer = csv.DictWriter(f, self.__csv_fields)
         writer.writeheader()
         for day in self.__db.all():
             writer.writerow(day)
    def create_manifest(self):
        self.create_remote_path()

        total_folders = len(self._folders)

        folder_name_padding = len(str(total_folders))
        if folder_name_padding < 2:
            folder_name_padding = 2

        folder_num = 0

        filename = 'manifest.tsv'

        keys = ['path', 'parent', 'name', 'forceVersion'] + \
            FileMetadataWorker.DICOM_ANNOTATION_FIELDS.keys()

        with io.open(filename, 'w', encoding='utf8') as fp:
            csvWriter = csv.DictWriter(fp,
                                       keys,
                                       restval='',
                                       extrasaction='ignore',
                                       delimiter=u'\t')
            csvWriter.writeheader()

            for files in self._folders:
                folder_num += 1
                folder_path = (self._remote_path or '')

                if total_folders > 1:
                    folder_name = str(folder_num).zfill(folder_name_padding)
                    folder_path = os.path.join(folder_path, folder_name)
                    self.create_folder_in_synapse(folder_path)

                for file_info in files:
                    file_name = file_info["calculated_name"]
                    file_full_local_path = file_info['full_path']

                    full_synapse_path, synapse_parent, _ = self.to_synapse_path(
                        os.path.join(folder_path, file_name))
                    logging.info('{0} -> {1}'.format(file_full_local_path,
                                                     full_synapse_path))

                    row = {
                        "path": file_full_local_path,
                        "parent": synapse_parent.id,
                        "forceVersion": True,
                        "name": file_name
                    }
                    for field_name in FileMetadataWorker.DICOM_ANNOTATION_FIELDS.keys(
                    ):
                        row[field_name] = file_info.get('annotations',
                                                        {}).get(field_name)

                    csvWriter.writerow(row)

        logging.info('Manifest written to: {0}'.format(filename))
Example #6
0
def _write_manifest_data(filename, keys, data):
    with io.open(filename, 'w', encoding='utf8') as fp:
        csvWriter = csv.DictWriter(fp,
                                   keys,
                                   restval='',
                                   extrasaction='ignore',
                                   delimiter='\t')
        csvWriter.writeheader()
        for row in data:
            csvWriter.writerow(row)
Example #7
0
 def test_write_simple_dict(self):
     with TemporaryFile("w+", newline='') as fileobj:
         writer = csv.DictWriter(fileobj, fieldnames=["f1", "f2", "f3"])
         writer.writeheader()
         fileobj.seek(0)
         self.assertEqual(fileobj.readline(), "f1,f2,f3\r\n")
         writer.writerow({"f1": 10, "f3": "abc"})
         fileobj.seek(0)
         fileobj.readline()  # header
         self.assertEqual(fileobj.read(), "10,,abc\r\n")
Example #8
0
 def test_write_fields_not_in_fieldnames(self):
     with TemporaryFile("w+", newline='') as fileobj:
         writer = csv.DictWriter(fileobj, fieldnames=["f1", "f2", "f3"])
         # Of special note is the non-string key (issue 19449)
         with self.assertRaises(ValueError) as cx:
             writer.writerow({"f4": 10, "f2": "spam", 1: "abc"})
         exception = str(cx.exception)
         self.assertIn("fieldnames", exception)
         self.assertIn("'f4'", exception)
         self.assertNotIn("'f2'", exception)
         self.assertIn("1", exception)
Example #9
0
def writeDictionaryToCSV(dict, fileName):
    with io.open(fileName, 'w', encoding="utf-8") as csv_file:
        fieldNames = HEADER_CSV_KEYS_ORDER
        writer = csv.DictWriter(csv_file, fieldnames = fieldNames)
        writer.writeheader()
        for key in dict.keys():
            # Mapping of (POSTNAME, [VOTES])
            votesArr = dict[key]
            print("writing",votesArr)
            for vote in votesArr:
                #encodedRow = [v.decode('utf8') if isinstance(v, str) else v for v in vote]
                writer.writerow(vote)
Example #10
0
 def serialize(self, out):
     output = csv.StringIO()
     writer = csv.DictWriter(output, FIELDNAMES, dialect="catkeys")
     # No real headers, the first line contains metadata
     writer.writerow(
         dict(
             zip(FIELDNAMES, [
                 self.header._header_dict[key] for key in FIELDNAMES_HEADER
             ])))
     for unit in self.units:
         writer.writerow(unit.dict)
     out.write(output.getvalue().encode(self.encoding))
Example #11
0
    def serialize(self, out):
        # Check first if there is at least one translated unit
        translated_units = [u for u in self.units if u.istranslated()]
        if not translated_units:
            return

        output = csv.StringIO()
        writer = csv.DictWriter(output,
                                fieldnames=OMEGAT_FIELDNAMES,
                                dialect="omegat")
        for unit in translated_units:
            writer.writerow(unit.dict)
        out.write(output.getvalue().encode(self.encoding))
Example #12
0
    def serialize(self, out):
        # Check first if there is at least one translated unit
        translated_units = [u for u in self.units if u.istranslated()]
        if not translated_units:
            return

        output = csv.StringIO()
        writer = csv.DictWriter(output, fieldnames=self._fieldnames, dialect="utx")
        for unit in translated_units:
            writer.writerow(unit.dict)

        result = output.getvalue().encode(self.encoding)
        out.write(self._write_header().encode(self.encoding))
        out.write(result)
Example #13
0
    def serialize(self, out):
        # Check first if there is at least one translated unit
        translated_units = [u for u in self.units if u.istranslated()]
        if not translated_units:
            return

        output = csv.StringIO()
        writer = csv.DictWriter(output, fieldnames=WF_FIELDNAMES, dialect="wordfast")
        # No real headers, the first line contains metadata
        self.header.tucount = len(translated_units)
        writer.writerow(dict(zip(WF_FIELDNAMES, [self.header.header[key] for key in WF_FIELDNAMES_HEADER])))

        for unit in translated_units:
            writer.writerow(unit.dict)
        out.write(output.getvalue().encode(self.encoding))
Example #14
0
 def test_write_multiple_dict_rows(self):
     fileobj = StringIO()
     writer = csv.DictWriter(fileobj, fieldnames=["f1", "f2", "f3"])
     writer.writeheader()
     self.assertEqual(fileobj.getvalue(), "f1,f2,f3\r\n")
     writer.writerows([{
         "f1": 1,
         "f2": "abc",
         "f3": "f"
     }, {
         "f1": 2,
         "f2": 5,
         "f3": "xyz"
     }])
     self.assertEqual(fileobj.getvalue(),
                      "f1,f2,f3\r\n1,abc,f\r\n2,5,xyz\r\n")
Example #15
0
    def write_to_csv(self):
        if self.num_results > 0:
            self.num_results = sum(1 for line in codecs.open(self.tmp_file, mode='r', encoding='utf-8'))
            if self.num_results > 0:
                output_file = codecs.open(self.output_file, mode='a', encoding='utf-8')
                csv_writer = csv.DictWriter(output_file, fieldnames=self.csv_headers)
                csv_writer.writeheader()
                timer = 0

                for line in codecs.open(self.tmp_file, mode='r', encoding='utf-8'):
                    timer += 1
                    csv_writer.writerow(json.loads(line))
                output_file.close()
            else:
                print('There is no docs with selected field(s): {}.'.format(','.join(self.opts['fields'])))
            os.remove(self.tmp_file)
Example #16
0
    def write_to_csv(self):
        csv.register_dialect(name=u"export",
                             delimiter=self.opts.delimiter.decode('utf-8'),
                             quotechar=self.opts.quotechar.decode('utf-8'),
                             doublequote=True,
                             skipinitialspace=False,
                             lineterminator=u'\r\n',
                             quoting=csv.QUOTE_ALL)

        if self.num_results > 0:
            self.num_results = sum(1 for line in codecs.open(
                self.tmp_file, mode='r', encoding='utf-8'))
            if self.num_results > 0:
                output_file = codecs.open(self.opts.output_file,
                                          mode='a',
                                          encoding='utf-8')
                csv_writer = csv.DictWriter(output_file,
                                            fieldnames=self.csv_headers,
                                            dialect="export")
                csv_writer.writeheader()
                timer = 0
                widgets = [
                    'Write to csv ',
                    progressbar.Bar(left='[', marker='#', right=']'),
                    progressbar.FormatLabel(' [%(value)i/%(max)i] ['),
                    progressbar.Percentage(),
                    progressbar.FormatLabel('] [%(elapsed)s] ['),
                    progressbar.ETA(), '] [',
                    progressbar.FileTransferSpeed(unit='lines'), ']'
                ]
                bar = progressbar.ProgressBar(widgets=widgets,
                                              maxval=self.num_results).start()

                for line in codecs.open(self.tmp_file,
                                        mode='r',
                                        encoding='utf-8'):
                    timer += 1
                    bar.update(timer)
                    csv_writer.writerow(json.loads(line))
                output_file.close()
                bar.finish()
            else:
                print('There is no docs with selected field(s): {}.'.format(
                    ','.join(self.opts.fields)))
            os.remove(self.tmp_file)
Example #17
0
def anonymize_file(source,
                   dest,
                   csvheaderformatdict=None,
                   ignorementions=False):
    print('Reading from [{0}] and writing anonymized data to [{1}]...'.format(
        source, dest))
    with io.open(source, 'r', encoding='utf8') as f:
        #with io.open(source, 'r') as f:
        with io.open(dest, 'w', encoding='utf8') as o:
            reader = csv.DictReader(f)
            if not csvheaderformatdict:
                fieldnames = consts.defaultHeader
            else:
                fieldnames = reader.fieldnames
            writer = csv.DictWriter(o, fieldnames)
            writer.writeheader()
            for row in reader:
                anonymize_row(row, fieldnames, csvheaderformatdict,
                              ignorementions)
                writer.writerow(row)
def parse_all_apis():
    apis = load_unparsed_api_metadata()
    total_count = len(apis)
    a = "{} movie metadata were loaded!".format(total_count)
    print(a)
    with open("api_data.csv", "w") as f:
        header_was_written = False
        for i, api in enumerate(apis):
            #b = "Processing {} of {}: {}".format(i + 1, total_count, api['api_name'])
            #print(b)
            parsed_api = parse_one_api_metadata(api)
            w = csv.DictWriter(f, parsed_api.keys())
            if not header_was_written:
                w.writeheader()
                header_was_written = True

            try:
                w.writerow(parsed_api)
            except UnicodeEncodeError:
                print(parsed_api)
Example #19
0
def people_to_qualtrics_csv(hub, repo_tools_data, frequency, update):
    """
    Print out a formatted file as expected by Qualtrics import.
    """

    if update is not None:
        with open(update, newline='', encoding='utf-8') as update_data:
            reader = csv.DictReader(update_data)
            initial = {row[EMAIL]: row for row in reader}
        fields = [field for field in reader.fieldnames if field]
    else:
        initial = {}
        fields = [NAME, EMAIL, WEEK, ASSOCIATED_WITH, UNSUBSCRIBED]

    csv_writer = csv.DictWriter(click.get_text_stream('stdout'),
                                fieldnames=fields,
                                extrasaction='ignore')
    csv_writer.writeheader()
    for username, person in repo_tools_data.people.iteritems():
        if person.email is None:
            continue

        hashdigest = hashlib.md5(person.email.lower()).hexdigest()

        row = initial.get(person.email, {})
        row.update({
            NAME:
            person.name,
            EMAIL:
            person.email,
            WEEK:
            int(hashdigest, 16) % frequency + 1,
            ASSOCIATED_WITH:
            'edX' if person.associated_with('edX', 'ArbiSoft') else 'other',
        })

        if not person.email_ok:
            row[UNSUBSCRIBED] = 'true'

        csv_writer.writerow(row)
Example #20
0
    async def on_ready(self):
        print('Logged on to discord as {0}!'.format(self.user))
        print('Attempting to query discord channel ID {0}...'.format(
            self.exportChannelID))
        channel = self.get_channel(int(self.exportChannelID))
        if channel:
            with io.open(self.exportCSV, 'w', encoding='utf8') as o:
                print('Exporting anonymized data to [{0}]...'.format(
                    self.exportCSV))
                fieldnames = consts.defaultHeader
                writer = csv.DictWriter(o, fieldnames)
                writer.writeheader()
                print('Parsing channel history...')
                async for message in channel.history(
                        limit=self.exportChannelLimit,
                        after=self.exportChannelAfter):
                    # TODO: Do something with attachments and reactions
                    row = {
                        'AuthorID': message.author.id,
                        'Author': format(message.author),
                        'Date': message.created_at,
                        'Content': message.content,
                        'Attachments': len(message.attachments),
                        'Reactions': len(message.reactions)
                    }

                    # Replace all mentions with name#discriminator format
                    # TODO: Optimize this iteration, also combining it with the anonymization (callback?)
                    for user in message.mentions:
                        row['Content'] = row['Content'].replace(
                            '<@!{0}>'.format(user.id),
                            '@{0}#{1}'.format(user.name, user.discriminator))

                    anonymize_row(row, fieldnames)

                    writer.writerow(row)
        # Using await should wait until any previous async finishes
        print('Disconnecting from discord')
        await self.logout()
Example #21
0
def JLwriteLinkedResults(clustered_pairs,
                         input_1,
                         input_2,
                         output_file,
                         inner_join=False):
    if not inner_join:
        raise Exception("Only expected to be used with inner_join.")

    logging.info('saving unique results to: %s' % output_file)

    writer = csv.DictWriter(output_file,
                            fieldnames=['roil', 'outside', 'confidence'])
    writer.writeheader()
    get_num = lambda x: x.split('|')[1]
    for record in clustered_pairs:
        pair, confidence = record
        outside = get_num(pair[0])
        roil = get_num(pair[1])
        writer.writerow({
            'roil': roil,
            'outside': outside,
            'confidence': str(confidence),
        })
Example #22
0
    def render(self, data, media_type=None, renderer_context=None):
        response = renderer_context.get('response', None)

        if response is not None and response.exception:
            return None

            # TODO: Replace return None with commented lines below to get human-readable description in response
            #
            # Returning a non-None value in this function, as part of the exception handling code path in Django REST
            # Framework, generates an HTTP response that Chrome rejects as invalid (ERR_INVALID_RESPONSE).  Possible
            # Django REST Framework bug?
            #
            # fieldnames = data.keys()
            # rows = [data]
        else:
            fieldnames = data['fieldnames']
            rows = data['rowdicts']

        buff = io.StringIO()
        writer = csv.DictWriter(buff, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(rows)

        return buff.getvalue().encode(self.charset)
Example #23
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--batchsize", "-b", type=int, default=8)
    parser.add_argument("input")
    parser.add_argument("output")
    args = parser.parse_args()
    countdown = args.batchsize
    with io.open(args.input, 'r', encoding='utf-8') as input:
        books_reader = csv.DictReader(input)
        with io.open(args.output, 'w', encoding='utf-8') as output:
            books_writer = csv.DictWriter(output, fieldnames)
            books_writer.writeheader()
            for row in books_reader:
                if countdown > 0 and not row.get('webchecked', None):
                    isbn = str(row.get('ISBN', None))
                    if len(isbn) == 9:
                        isbn = "0" + isbn
                    if isbn:
                        countdown = countdown - 1
                        new_isbn = isbnlib.to_isbn13(isbnlib.canonical(isbn))
                        if new_isbn is None or new_isbn == "":
                            print "Could not canonicalize isbn", isbn
                        else:
                            row['ISBN'] = new_isbn
                        details = None
                        try:
                            details = isbnlib.meta(isbn)
                        except isbnlib.dev._exceptions.NoDataForSelectorError:
                            print "No data for ISBN", isbn, "title", row.get('Title', "Unknown")
                            row['webchecked'] = "No data for ISBN"
                        except isbnlib._exceptions.NotValidISBNError:
                            print "Invalid ISBN", isbn, "for", row['Title']
                            row['webchecked'] = "Invalid ISBN"
                        except isbnlib.dev._exceptions.ISBNNotConsistentError:
                            print "Inconsistent data for",  row['Title']
                            row['webchecked'] = "Inconsistent ISBN data"
                        if details:
                            if details.get('ISBN-13', "") != "" and row.get('ISBN', "") == "":
                                row['ISBN'] = details['ISBN-13']
                            if 'Authors' in row:
                                row['Authors'] = row['Authors'].split('/')
                            old_title = row['Title']
                            web_title = details['Title']
                            if old_title != web_title:
                                old_canon = canonicalize_title(old_title)
                                web_canon = canonicalize_title(web_title)
                                old_len = len(old_canon)
                                web_len = len(web_canon)
                                if ((web_len > old_len and old_canon in web_canon)
                                    or (web_len == old_len and old_canon == web_canon)):
                                    print "Title improvement from", old_title, "to", web_title
                                else:
                                    print "Title discrepancy:", old_title, "in file,", web_title, "found online"
                                    details['Title'] = old_title
                            # don't use 'update', because we don't want to drag in random other fields that dictwriter will then object to
                            for key in fieldnames:
                                if key in details:
                                    row[key] = details[key]
                            if 'Authors' in row:
                                row['Authors'] = '/'.join(row['Authors'])
                            row['webchecked'] = "OK"
                # from https://docs.python.org/2/library/csv.html
                encoded_row = {k: (v.encode("utf-8") if isinstance(v, basestring) else v)
                               for k,v in row.iteritems()}
                books_writer.writerow(row)
def test_create_and_update_file_view():

    ## Create a folder
    folder = Folder(str(uuid.uuid4()),
                    parent=project,
                    description='creating a file-view')
    folder = syn.store(folder)

    ## Create dummy file with annotations in our folder
    path = utils.make_bogus_data_file()
    file_annotations = dict(fileFormat='jpg',
                            dataType='image',
                            artist='Banksy',
                            medium='print',
                            title='Girl With Ballon')
    schedule_for_cleanup(path)
    a_file = File(path, parent=folder, annotations=file_annotations)
    a_file = syn.store(a_file)
    schedule_for_cleanup(a_file)

    # Add new columns for the annotations on this file and get their IDs
    my_added_cols = [
        syn.store(synapseclient.Column(name=k, columnType="STRING"))
        for k in file_annotations.keys()
    ]
    my_added_cols_ids = [c['id'] for c in my_added_cols]
    view_default_ids = [
        c['id'] for c in syn._get_default_entity_view_columns('file')
    ]
    col_ids = my_added_cols_ids + view_default_ids
    scopeIds = [folder['id'].lstrip('syn')]

    ## Create an empty entity-view with defined scope as folder

    entity_view = EntityViewSchema(name=str(uuid.uuid4()),
                                   scopeIds=scopeIds,
                                   addDefaultViewColumns=True,
                                   addAnnotationColumns=False,
                                   type='file',
                                   columns=my_added_cols,
                                   parent=project)

    entity_view = syn.store(entity_view)
    schedule_for_cleanup(entity_view)

    assert_equals(set(scopeIds), set(entity_view.scopeIds))
    assert_equals(set(col_ids), set(entity_view.columnIds))
    assert_equals('file', entity_view.type)

    ## get the current view-schema
    view = syn.tableQuery("select * from %s" % entity_view.id)
    schedule_for_cleanup(view.filepath)

    view_dict = list(
        csv.DictReader(io.open(view.filepath, encoding="utf-8", newline='')))

    # check that all of the annotations were retrieved from the view
    assert set(file_annotations.keys()).issubset(set(view_dict[0].keys()))

    updated_a_file = syn.get(a_file.id, downloadFile=False)

    # Check that the values are the same as what was set
    # Both in the view and on the entity itself
    for k, v in file_annotations.items():
        assert_equals(view_dict[0][k], v)
        assert_equals(updated_a_file.annotations[k][0], v)

    # Make a change to the view and store
    view_dict[0]['fileFormat'] = 'PNG'

    with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as temp:
        schedule_for_cleanup(temp.name)
        temp_filename = temp.name

    with io.open(temp_filename, mode='w', encoding="utf-8",
                 newline='') as temp_file:
        dw = csv.DictWriter(temp_file,
                            fieldnames=view_dict[0].keys(),
                            quoting=csv.QUOTE_NONNUMERIC,
                            lineterminator=str(os.linesep))
        dw.writeheader()
        dw.writerows(view_dict)
        temp_file.flush()
    new_view = syn.store(synapseclient.Table(entity_view.id, temp_filename))
    new_view_dict = list(
        csv.DictReader(io.open(temp_filename, encoding="utf-8", newline='')))
    assert_equals(new_view_dict[0]['fileFormat'], 'PNG')

    #query for the change
    start_time = time.time()

    new_view_results = syn.tableQuery("select * from %s" % entity_view.id)
    schedule_for_cleanup(new_view_results.filepath)
    new_view_dict = list(
        csv.DictReader(
            io.open(new_view_results.filepath, encoding="utf-8", newline='')))
    #query until change is seen.
    while new_view_dict[0]['fileFormat'] != 'PNG':
        #check timeout
        assert_less(time.time() - start_time, QUERY_TIMEOUT_SEC)
        #query again
        new_view_results = syn.tableQuery("select * from %s" % entity_view.id)
        new_view_dict = list(
            csv.DictReader(
                io.open(new_view_results.filepath,
                        encoding="utf-8",
                        newline='')))
    #paranoid check
    assert_equals(new_view_dict[0]['fileFormat'], 'PNG')
Example #25
0
 def write_header(self, header_columns):
     self.internal_writer = csv.DictWriter(self.stdout,
                                           fieldnames=header_columns,
                                           quoting=csv.QUOTE_ALL)
     self.internal_writer.writeheader()