Beispiel #1
0
def test(cursor):
	cursor.execute("""SELECT distinct(source) FROM flows""")
	set_flow= set(cursor)
	print "distinct source in flow", len(set_flow) 

	cursor.execute("""SELECT distinct(source) FROM exchange_rates""")
	set_ex = set(cursor)
	print "distinct source in exchange_rates", len(set_ex)

	cursor.execute("""SELECT slug FROM sources""")
	set_source = set(cursor)

	print "nb elem in source", len(set_source)
 
	missing_flow_source_list = set_flow - set_source

	writer = UnicodeWriter(open(os.path.join("../out_data", "missing_flow_source_list" + ".csv"), "wb"))
	writer.writerows(missing_flow_source_list)


	missing_ex_source_list = set_ex - set_source

	writer = UnicodeWriter(open(os.path.join("../out_data", "missing_ex_source_list" + ".csv"), "wb"))
	writer.writerows(missing_ex_source_list)
Beispiel #2
0
                next_date = dates[i_date + 1]
                if current_date == next_date - 1:
                    pass
                else:
                    periods[-1] = "%s-%s" % (
                        periods[-1], current_date
                    ) if periods[-1] != current_date else str(current_date)
                    periods.append(next_date)

            else:
                # fin 2 : fin de la liste
                periods[-1] = "%s-%s" % (
                    periods[-1], current_date
                ) if periods[-1] != current_date else str(current_date)

        row[2] = ",".join(periods)
        if row[1] == None:
            row[1] = "champs vide"

    return table


writer = UnicodeWriter(
    open(os.path.join("out_data", 'report_by_sources_and_period.csv'), "wb"))
writer.writerow([description[0] for description in c.description])

data = dateByReportingBySource(c.fetchall())
for d in data:
    print d[2]
writer.writerows(data)
Beispiel #3
0
print "-------------------------------------------------------------------------"

print "cleaning done"
conn.commit()
print "commited"
print "-------------------------------------------------------------------------"

################################################################################
##			Export all tables in csv files
################################################################################

tables = [
		"sources",
		"entity_names",
		"RICentities",
		"exchange_rates",
		"currencies",
		"expimp_spegen",
		"RICentities_groups",
		"flows"
		]

for item in tables:
	c.execute("select * from " + item)
	writer = UnicodeWriter(open(os.path.join("out_data", item + ".csv"), "wb"))
	writer.writerow([description[0] for description in c.description])
	# c.fetchall()
	writer.writerows(c)
	print "export " + item + ".csv done"
	print "-------------------------------------------------------------------------"
Beispiel #4
0
def test(cursor):

	#
	# Get distinct source in flows, exchnange_rates and sources
	#
	cursor.execute("""SELECT distinct(source) FROM flows""")
	set_flow= set(_ for _ in cursor)
	print "distinct source in flow", len(set_flow) 

	cursor.execute("""SELECT distinct(source) FROM exchange_rates""")
	set_ex = set(_ for _ in cursor)
	print "distinct source in exchange_rates", len(set_ex)

	cursor.execute("""SELECT distinct(slug) FROM sources""")
	set_source = set(_ for _ in cursor)
	print "nb elem in source", len(set_source)
 
 	#
	# output missing source in flows
	#
	missing_flow_source_list = set_flow - set_source
	print "flow sources missin in source table", len(missing_flow_source_list)
	with codecs.open(os.path.join("../out_data/logs", "missing_flow_source_list" + ".csv"), "wb","UTF8") as f:
		for s in missing_flow_source_list:
			f.write((s[0] if s[0] is not None else u"") +u"\n")

	#
	# output missing source in exchange_rates
	#
	missing_ex_source_list = set_ex - set_source
	print missing_ex_source_list
	print "Exchange rate missing in source table", len(missing_ex_source_list)
	with codecs.open(os.path.join("../out_data/logs", "missing_ex_source_list" + ".csv"), "wb","utf8") as f:
		for s in list(missing_ex_source_list):
			f.write((s[0]  if s[0] is not None else u"") + u"\n")

	#
	# output missing source with id in flows
	#
	missing_flow_source_list_id =[]
	flow_matching = 0

	for row in missing_flow_source_list:
		cursor.execute("""SELECT * FROM flows where source=?""",[row[0]])
		table = [list(r) for r in cursor]
		flow_matching+=1
		for row in table:
			missing_flow_source_list_id.append(row)

	unique_flow = []
	for r in missing_flow_source_list_id:
		if r not in unique_flow:
			unique_flow.append(r)

	writer = UnicodeWriter(open(os.path.join("../out_data/logs", "missing_flow_source_list_id" + ".csv"), "wb"))
	writer.writerows(unique_flow)

	#
	# output missing source with id in exchange_rates
	#
	missing_ex_source_list_id =[]
	ex_matching = 0

	for row in missing_ex_source_list:
		cursor.execute("""SELECT * FROM exchange_rates where source=?""",[row[0]])
		table = [list(r) for r in cursor]
		ex_matching+=1
		for row in table:
			# print row
			missing_ex_source_list_id.append(row)

	unique_ex = []
	for r in missing_ex_source_list_id:
		if r not in unique_ex:
			unique_ex.append(r)

	writer = UnicodeWriter(open(os.path.join("../out_data/logs", "missing_ex_source_list_id" + ".csv"), "wb"))
	writer.writerows(unique_ex)
		for i_date,current_date in enumerate(dates):

			if i_date<len(dates)-1:
				next_date=dates[i_date+1]
				if current_date==next_date-1:
					pass
				else:
					periods[-1]="%s-%s"%(periods[-1],current_date) if periods[-1]!=current_date else str(current_date)
					periods.append(next_date)
					
			else:
				# fin 2 : fin de la liste
				periods[-1]="%s-%s"%(periods[-1],current_date) if periods[-1]!=current_date else str(current_date)


		row[2] =  ",".join(periods)
		if row[1] == None:
			row[1] = "champs vide"

	return table

writer = UnicodeWriter(open(os.path.join("out_data", 'report_by_sources_and_period.csv'), "wb"))
writer.writerow([description[0] for description in c.description])

data = dateByReportingBySource(c.fetchall())
for d in data:
	print d[2] 
writer.writerows(data)

Beispiel #6
0
            current_source = row[0]
            next_source = table[i_source+1][0]
        if (current_source == next_source):
            newCSV.append(row);
        else:
        # csvTitle = unicode(current_source, 'utf-8')
            print newSource
            nameStats.append([current_source, len(newCSV)])
            csvTitle = unicodedata.normalize('NFD', current_source).encode('ascii', 'ignore')
            csvTitle = csvTitle.replace(" ", "_")
            if len(csvTitle) > 255:
                csvTitle = csvTitle[:200]
            try:
                writer = UnicodeWriter(open(os.path.join("./out_data/sources", csvTitle +'.csv'), "w"))
                writer.writerow([description[0] for description in c.description])
                writer.writerows(newCSV)
                newCSV = []
            except IOError as e:
                print "I/O error({0}): {1}".format(e.errno, e.strerror)
                elem = csvTitle.encode('utf8')
                errors.append(elem)
                pass


errorsNameFormat = open('./out_data/errors/errorsNameFormat.txt', 'w')
for item in errors:
    print>>errorsNameFormat, item
print "errorsNameFormat.txt done"

sourceNameErrors = open('./out_data/errors/sourceNameErrors.txt', 'w')
for item in set(nameProblem):