def main(): writer = UnicodeWriter(sys.stdout) writer.writerow(["gemeente", "stembureau", "postcode", "stemmen"]) for file_path in get_file_paths(): rows = parse_eml_file(file_path) writer.writerows(rows) return 0
def dataset(query_arguments, type_output='csv', delimiter='|', output=stdout, lang=None): if set(['main', 'union', 'optional', 'filter']) <= set(query_arguments): query_arguments = format_query(query=query_arguments, lang=lang) query = factory(main=query_arguments['main'], union=query_arguments['union'], optional=query_arguments['optional'], filter=query_arguments['filter']) results = retrieve_result(query) properties_set = set() formatted_result = defaultdict(lambda: defaultdict( lambda: defaultdict(lambda: defaultdict(list)))) for result in results: quantity_of_interest = result['quantity_of_interest']['value'] property_uri = result['property']['value'] property_label = 'property_label' in result and result['property_label']['value'] or '' hasValue_uri = result['hasValue']['value'] hasValue_label = 'hasValue_label' in result and result['hasValue_label']['value'] or '' properties_set.add((property_uri, property_label)) formatted_result[quantity_of_interest][property_uri]['hasValue']['hasValue_label'].append(hasValue_label) formatted_result[quantity_of_interest][property_uri]['hasValue']['hasValue_uri'].append(hasValue_uri) keys = list(properties_set) if type_output == 'csv': out = UnicodeWriter(output, ';') out.writerow(filter(None, (chain.from_iterable(keys)))) for qoi in formatted_result: cells = [['%s' % delimiter.join(formatted_result[qoi][key[0]]['hasValue']['hasValue_uri']), '%s' % delimiter.join(formatted_result[qoi][key[0]]['hasValue']['hasValue_label'])] if key[1] else ['%s' % delimiter.join(formatted_result[qoi][key[0]]['hasValue']['hasValue_uri'])] for key in keys] out.writerow(chain.from_iterable(cells)) return formatted_result
def convert_opml_to_csv(args): """Convert OPML file to Todoist CSV.""" tree = ET.parse(args.file) opml = tree.getroot() body = opml.find('body') with codecs.open(target_name(args.file, 'csv'), 'w+') as target: writer = UnicodeWriter(target, FIELDNAMES) writer.writerow(FIELDNAMES) def make_row(type='', content='', indent = ''): return [type, content, '', indent, '', '', '', ''] def process_element(outline, level=1): # content row = make_row(TYPE_TASK, outline.get('text'), str(level)) writer.writerow(row) # note note = outline.get(NOTE_ATTRIB) if note: row = make_row(TYPE_NOTE, note) writer.writerow(row) # separator writer.writerow(make_row()) for subelement in outline.findall('outline'): process_element(subelement, level+1) for outline in body: process_element(outline)
def write_json_to_csv(json_list, filename): from unicode_csv import UnicodeWriter translate = { "class_nbr":"course_num", "crn":"course_num", "num":"course_num", "number":"department_num", "dist":"distribution", "div":"division", "lim":"course_cap", "graded_sem":"seminar", "graded_seminar":"seminar", "double_graded_seminar":"seminar", "dept_name":"department", } scrap = {"req", "sign", "note", "prerequisite", "prereq"} # Get all the available headers. header_set = {header for obj in json_list for header in obj.keys()} headers = [h for h in header_set if not (h in translate or h in scrap)] headers.extend(translate.values()) headers = map(unicode, headers) headers = list(set(headers)) headers.sort() with open(filename, "w") as f: # Prepare the csv. writer = UnicodeWriter(f) # Write "cleaned" headers to the CSV cleaned_headers = [unicode(h.replace(" ","_").lower()) for h in headers] writer.writerow(cleaned_headers) for obj in json_list: for key in translate.keys(): if key in obj: new_key = translate[key] obj[new_key] = obj[key] vals = [] for header in headers: val = obj.get(header, "") if type(val) == list: val = map(str, val) vals.append(unicode(val)) writer.writerow(vals)
def main(): reader = UnicodeReader(sys.stdin) writer = UnicodeWriter(sys.stdout) writer.writerow([ "gemeente", "stembureau", "postcode", "stemmen", "postcode_google", "lat", "lng" ]) for row in reader: result = find_voting_place(row) writer.writerow(result) sleep(1) return 0
def main(): shapes = get_shapes(sys.argv[1]) writer = UnicodeWriter(sys.stdout) writer.writerow([ 'buurt_code', 'buurt_naam', 'wijk_code', 'gem_code', 'gem_naam']) for geom, props in shapes: out_row = [] for fld in [ u'BU_CODE', u'BU_NAAM', u'WK_CODE', u'GM_CODE', u'GM_NAAM' ]: out_row.append(props[fld]) writer.writerow(out_row) return 0
def write_group_by_one_filed(todos, output_path, value_name, values_list): csv_file = open(output_path, 'wb') res = group_by_value(todos, value_name, values_list) print('write: %s' % output_path) writer = UnicodeWriter(csv_file) row = ['complete'] row.extend(values_list) writer.writerow(row) for row in res: output_row = [row[0]] for _, v in row[1].items(): output_row.append(str(v)) writer.writerow(output_row)
class SqlExportFileWriter(object): """Writes rows to a CSV file, optionally filtering on a predicate.""" def __init__(self, dest, predicate=None, use_unicode=False): if use_unicode: self._writer = UnicodeWriter(dest, delimiter=DELIMITER) else: self._writer = csv.writer(dest, delimiter=DELIMITER) self._predicate = predicate def write_header(self, keys): self._writer.writerow(keys) def write_rows(self, results): if self._predicate: results = [result for result in results if self._predicate(result)] if results: self._writer.writerows(results)
def main(): if len(sys.argv) < 6: print >> sys.stderr, "Usage: merge.py <shape_file> <lat_field> <lon_field> <lat_fallbck> <lon_fallback>" return 1 reader = UnicodeReader(sys.stdin) writer = UnicodeWriter(sys.stdout) header = reader.next() shapes = get_shapes(sys.argv[1]) out_header = deepcopy(header) out_header += [ 'buurt_code', 'buurt_naam', 'wijk_code', 'wijk_naam', 'gem_code', 'gem_naam' ] writer.writerow(out_header) lat_field = sys.argv[2] lon_field = sys.argv[3] lat_fb_field = sys.argv[4] lon_fb_field = sys.argv[5] for row in reader: out_row = deepcopy(row) data = dict(zip(header, row)) if (data[lon_field] != u'-') and (data[lat_field] != u''): lat = data[lat_field] lon = data[lon_field] else: lat = data[lat_fb_field] lon = data[lon_fb_field] if (lat != u'-') and (lon != u'-'): point = shapely.geometry.Point(float(lat), float(lon)) for shape, props in shapes: if shape.contains(point): for fld in [ u'BU_CODE', u'BU_NAAM', u'BU_CODE', u'BU_NAAM', u'GM_CODE', u'GM_NAAM' ]: out_row.append(props[fld]) break if len(out_row) == len(row): # if we did not find anything out_row += [u'-', u'-', u'-', u'-', u'-', u'-'] writer.writerow(out_row) return 0
def write_to_csv(file_name): documents = get_docs() print `documents.count()` serialized_documents = json.loads(dumps(documents)) csv_file = open(file_name,'w') csv_writer = UnicodeWriter(csv_file, dialect='excel') count = 0 for doc in serialized_documents: print `doc` del(doc['_id']) if count == 0: header = doc.keys() #header.sort() csv_writer.writerow(header) count = count+1 csv_writer.writerow(doc.values()) csv_file.close()
def write_timer(todos, output_path): csv_file = open(output_path, 'wb') date_timer = {} print('write: %s' % output_path) writer = UnicodeWriter(csv_file) for todo in todos: if not isinstance(todo.timer, int): continue if todo.completed not in date_timer: date_timer[todo.completed] = 0 date_timer[todo.completed] += todo.timer row = ['date', 'timer'] writer.writerow(row) for date, minutes in date_timer.items(): output_row = [ date.strftime('%Y-%m-%d'), "{0:.2f}".format(minutes / 60.0) ] writer.writerow(output_row)
def all_csv(): pseudofile = StringIO() spamwriter = UnicodeWriter(pseudofile, encoding='utf-8') #, delimiter=' ', quotechar='|', quoting=csv.QUOTE_MINIMAL) header = ','.join([ 'company.name', 'company.cvr', 'context.year', 'grossprofitloss', ]) for company in all_companies: for context in company.contexts: if 'grossprofitloss' in context.fields: spamwriter.writerow([ company.name, company.cvr, context.year, context.fields['grossprofitloss'], ]) return Response(header + '\n' + pseudofile.getvalue(), mimetype='text/csv')
def main(): if len(sys.argv) < 3: print >> sys.stderr, "Usage: merge.py <file1> <file2>" return 1 places = get_places(sys.argv[2]) election_file = UnicodeReader(open(sys.argv[1])) headers = election_file.next() writer = UnicodeWriter(sys.stdout) writer.writerow([ "gemeente", "stembureau", "postcode", "stemmen", "postcode_google", "lat", "lng", "stembureau2017", "lat2017", "lon2017" ]) for row in election_file: result = dict(zip(headers, row)) place = None if result[u'postcode'] != u'-': place = find_place_by_postcode( places, re.sub(r'\s+', u'', result[u'postcode'])) elif result[u'postcode_google'] != u'': place = find_place_by_postcode( places, re.sub(r'\s+', u'', result[u'postcode'])) if place is None: place = find_place_by_muni_and_name(places, result[u'gemeente'], result[u'stembureau']) result_row = deepcopy(row) if place is not None: result_row.append(place[u'stembureau']) result_row.append(place[u'Longitude']) result_row.append(place[u'Latitude']) else: result_row.append(u'-') result_row.append(u'-') result_row.append(u'-') # if result_row[-1] != u'-': # pprint(result_row) writer.writerow(result_row) return 0
def generate_csv(params, sort_by): class_dict = get_class_dict() primary_column_name = get_primary_column_name() all_classes = get_all_classes() keypairs = {} for param in params: name = param['name'] class_type = class_dict[name] value = class_type.get_search_value(param) if name == primary_column_name: name = '_id' if value: keypairs.update({name: value}) items = accounts_collection.find(keypairs) if sort_by: items = items.sort(sort_by) with TemporaryFile() as f: # csv_writer = csv.writer(f) csv_writer = UnicodeWriter(f) csv_writer.writerow(all_classes) for item in items: csv_columns =[] primary_key = item['_id'] for name in all_classes: class_type = class_dict[name] if name == primary_column_name: name = '_id' if name in item: csv_string = class_type.get_csv_string(item[name]) else: csv_string = default_csv_string csv_columns.append(csv_string) csv_writer.writerow(csv_columns) f.seek(0) lines = f.read() return lines
def marcanalyse(files, sample_length=5): """ returns a csv of marc keys and analysed values, showing, for example, how many records exist. ================= ============================================================== Column Description ================= ============================================================== ``tag`` The 3-digit MARC tag. ``subfield`` The single-character subfield. ``tag_meaning`` The English meaning of the tag/subfield, if known. ``record_count`` The number of records that have at least one of these tags. ``min_valency`` The minimum number of this tag or subfield that each record has. ``max_valency`` The maximum number of this tag or subfield that each record has. ``samples`` Non-repeating sample values of the values of each tag or subfield. ================= ============================================================== """ analysis = multifile_iter_records(files, sample_length = sample_length) csv_header=("tag", "subfield", "tag_meaning", "record_count", "min_valency", "max_valency","samples") writer = UnicodeWriter(sys.stdout) writer.writerow(csv_header) listanalysis = [x for x in analysis.iteritems()] listanalysis.sort() for key, value in listanalysis: v = [] v.append(u'"%s"' % key) #tag v.append(u"") # subfield v.append(meaning(key)) #tag_meaning v.append(unicode(value['count'])) #record_count v.append(unicode(value['min_valency'])) v.append(unicode(value['max_valency'])) v.append(u"\r\r".join(value['samples'])) writer.writerow(v) listanalysis = [x for x in value['subfields'].iteritems()] listanalysis.sort() for subfield, value in listanalysis: v = [] v.append("") #tag v.append(subfield) # subfield v.append(meaning(key, subfield)) #tag_meaning v.append(unicode(value['count'])) #record_count v.append(unicode(value['min_valency'])) v.append(unicode(value['max_valency'])) v.append(u"\r\r".join(value['samples'])) writer.writerow(v)
class OpmlToCsvConverter(OpmlConverter): """Convert OPML file to Todoist CSV.""" EXT = 'csv' def __init__(self, args): super(OpmlToCsvConverter, self).__init__(args) def convert(self): document_body = self._prepare_document() with codecs.open(self.target_name, 'w+') as target: self.writer = UnicodeWriter(target, FIELDNAMES) self._write_header_row() for outline in document_body: self.process_element(outline) def _prepare_document(self): tree = ET.parse(self.source_name) opml = tree.getroot() return opml.find('body') def _write_header_row(self): self.writer.writerow(FIELDNAMES) def _make_row(self, type='', content='', indent=''): return [type, content, '', indent, '', '', '', '', ''] def process_element(self, outline, level=1): # content row = self._make_row(self.TYPE_TASK, outline.get('text'), str(level)) self.writer.writerow(row) # note note = outline.get(self.NOTE_ATTRIB) if note: row = self._make_row(self.TYPE_NOTE, note) self.writer.writerow(row) # separator self.writer.writerow(self._make_row()) for subelement in outline.findall('outline'): self.process_element(subelement, level + 1)
else: minute = playtime.text.split()[1] percent = playtime.text.split()[-1] #playername = WebDriverWait(ff, 60).until( #EC.presence_of_element_located(( #By.CLASS_NAME, "fn" #)) #) #playtime = WebDriverWait(ff, 60).until( #EC.presence_of_element_located(( #By.XPATH, "//td[ b[text() = 'Minutes:'] ]" #)) #) finally: wr.writerow([link, name, minute, percent]) result.append([link, name, minute, percent]) ff.close() print " ".join(("done", str(links.index(link)))) time.sleep(random.randint(3,7)) result_file.close() pickle.dump(result, open("result_file2", "w")) # /html/body/form/section/article/div/table/tbody/tr/td/table[3]/tbody/tr[2]/td/span/table[2]/tbody/tr[12]/td/table/tbody/tr/td/table/tbody/tr[4]/td
if page: wikipedia[page] = {'viaf': viaf_code, 'sbn': sbn_code} if viaf_code is not None: viaf2wiki[viaf_code] = page if sbn_code is not None: sbn2wiki[sbn_code] = page page = page or '' viaf_code = viaf_code or '' sbn_code = sbn_code or '' wikiwriter.writerow([page, viaf_code, sbn_code]) time.sleep(0.5) outwikifile.close() viaf_sbn_codes = set(sbn2viaf.keys()) sbn_codes = set(sbn.keys()) viaf_sbn_intersection = viaf_sbn_codes.intersection(sbn_codes) print 'no. of VIAF-ICCU SBN records: ', len(viaf2sbn) print 'no. of ICCU SBN records with description: ', len(sbn) print 'no. of elements in the intersection:', len(viaf_sbn_intersection) viaf_code = None sbn_code = None
rdr = UnicodeReader(open(sys.argv[1])) tag_re = re.compile('\W', re.UNICODE) flickr_tags = set() tag_source = os.path.join(os.path.dirname(__file__), '../data/tags_from_flickr.csv') with open(tag_source) as f: tagreader = UnicodeReader(f) for tagrow in tagreader: text, raw, author = tagrow[:] flickr_tags.add(text) outfile = open('updated_tags.orig.csv', 'w+') wrtr = UnicodeWriter(outfile) head = [ 'Normalized', 'Raw', 'Model', 'field' ] wrtr.writerow(head) for row in rdr: new_row = [] ugly, raw, mitch, laura, model, field, basis = row[:] u = 'MISSING' if ugly: u = ugly if ugly in flickr_tags else ('CURR:NO_MATCH:%s' % ugly) else: ugly = unicodedata.normalize('NFC', raw) ugly = tag_re.sub('', ugly.strip().lower()) u = ugly if ugly in flickr_tags else ('NEW:NO_MATCH:%s' % ugly) new_row = row[:] new_row[0] = u wrtr.writerow(new_row)
import json import os.path import sys import io from unicode_csv import UnicodeWriter def get_value_from_dict_or_return_NA(dict, key): if key in dict and dict[key] != None: return dict[key] return "NA" with io.open('acl_instagram.csv', 'ab') as csv_data: writer = UnicodeWriter(csv_data, delimiter='`') with open('acl_instagram.txt') as instagram_file: for insta_data in instagram_file: post = json.loads(insta_data) writer.writerow([ get_value_from_dict_or_return_NA(post, 'user'), get_value_from_dict_or_return_NA(post, 'post_time'), get_value_from_dict_or_return_NA(post, 'post_location'), get_value_from_dict_or_return_NA(post, 'likes_count'), get_value_from_dict_or_return_NA(post, 'views_count'), json.dumps(get_value_from_dict_or_return_NA(post, 'comments')) ])