def save_csv(f, tvseries): ''' Output a CSV file containing highest ranking TV-series. ''' # write caption of the table that is the csv file unicodeWriter = UnicodeWriter(f) unicodeWriter.writerow(['Title', 'Ranking', 'Genre', 'Actors', 'Runtime']) #writer = csv.writer(f) #writer.writerow(['Title', 'Ranking', 'Genre', 'Actors', 'Runtime']) # write the scraped data to the csv file for Row in tvseries: #writer.writerow(Row) unicodeWriter.writerow(Row)
def createCSV(url): content = urllib2.urlopen(url).read() objects = json.loads(content) for obj in objects: if len(obj['value']) > 2 : timestamp = datetime.datetime.fromtimestamp(obj['timestamp'] / 1e3) with open(str(timestamp) + '.csv', 'wb') as csvfile: csvWriter = UnicodeWriter(csvfile); x = obj['value']['x'].split(' ') y = obj['value']['y'].split(' ') z = obj['value']['z'].split(' ') for i in xrange(0,len(x)): row = [x[i],y[i],z[i]] csvWriter.writerow(row)
def expCSV(self, save=False): if not save: nameInput = QInputDialog.getText(QWidget(), u"Название таблицы", u"Введите название таблицы") if nameInput[1]: name = nameInput[0] else: return if save: name = self.dlg.tabComboBox.currentText().replace('.csv', '') with open(self.plugin_dir + '/tab/' + name + '.csv', 'wb') as fout: writer = UnicodeWriter(fout, delimiter=';') for r in range(self.model.rowCount()): itemCode = self.model.item(r, 0).text() itemName = self.model.item(r, 1).text() itemSymb = self.model.item(r, 2).text() writer.writerow([itemCode, itemName, itemSymb])
def __init__( self ) : #1) generate area lookup with xlrd.open_workbook( 'OOP_dislokace.xlsx', encoding_override="cp1251" ) as wb: sh = wb.sheet_by_index( 0 ) self.areaLookup = AreaLookup( sh ); with xlrd.open_workbook( 'kraje_ciselnik-stary.xls' ) as wb: sh = wb.sheet_by_index( 0 ) self.regionLookup = RegionLookup( sh ); with xlrd.open_workbook( 'okresy_ciselnik.xls' ) as wb: sh = wb.sheet_by_index( 0 ) self.countyLookup = CountyLookup( sh ); # with xlrd.open_workbook( 'SPH_OBEC.xls' ) as wb: with open('obce.csv', 'wb') as f: sh = wb.sheet_by_index( 0 ) numRows = sh.nrows print wb.encoding writer = UnicodeWriter( f ) for rowIndex in range(numRows): if rowIndex > 0 : placeId = sh.row( rowIndex )[0].value placeName = sh.row( rowIndex )[5].value nuts = sh.row( rowIndex )[1].value #regionName = self.regionLookup.getNameByCode( nuts ) countyName = self.countyLookup.getNameByCode( nuts ) keyName = unicode( placeName ) + "-" + unicode( countyName ) #oopName = self.countyLookup.getNameByCode( nuts ) oopName = self.areaLookup.getParentByName( keyName ) if oopName == -1: #print sh.row( rowIndex ) print unicode( placeName ) + "," + unicode( nuts ) + "," + unicode( keyName ) + "," + unicode( oopName ) writer.writerow( [ str( int(placeId) ), unicode( oopName ) ] )
def removeZeroRowsFromArray( origFile, newFile ): resultArr = [] origFileReader = csv.reader( origFile ) writer = UnicodeWriter( newFile ) for row in origFileReader: #check if there is any value hasValue = False numColumns = len( row ) for i in range( numColumns ): if i > 3 and i < ( numColumns - 1 ): value = float( row[i] ) if value != 0: hasValue = True break if hasValue: writer.writerow( row )
czechCrime = "" if crime != -1 : englishCrime = crime[3] a = englishCrime.split(" ") a[0] = a[0].capitalize() englishCrime = " ".join(a) czechCrime = crime[1] a = czechCrime.split(" ") a[0] = a[0].capitalize() czechCrime = " ".join(a) else: print row[0], row[1] finalRow = [ row[0], czechCrime, englishCrime, row[3] ] finalRows.append( finalRow ) with open( "crimeLookup2.csv", "wb" ) as csvfile: writer = UnicodeWriter( csvfile ) for row in finalRows: rowArray = [] for column in row: #print column rowArray.append( unicode( column.decode( "utf-8" ) ) ) writer.writerow( rowArray )
global kmls for row in csvreader: kmls.append( row[6] ) #create areaLookup with xlrd.open_workbook( 'areaLookup.xls', encoding_override="utf-8" ) as wb: sh = wb.sheet_by_index( 0 ) global areaLookup areaLookup = AreaLookupXls( sh ); with open('pridani-id-do-shp/shape-without-geo.csv', 'rb') as csvfile: csvreader = csv.reader(csvfile) with open('pridani-id-do-shp/shape-with-geo.csv', 'wb') as f: writer = UnicodeWriter( f ) index = 0 global kmls global areaLookup #for areaName in areaLookup.areas: # print areaName for row in csvreader: name = row[1].lower().strip() code = areaLookup.getCodeByName( name ) if index > 0: writer.writerow( [ unicode(row[1].decode("utf-8") ), unicode( kmls[index]) ] ) index = index + 1
if float( found ) > 0: solvedPerc = ( float( solved ) / float( found ) ) * 100 else: solvedPerc = 0 finalRow.append( str( solvedPerc ) ) else: finalRow.append( str( diff ) ) finalFile.append( finalRow ) rowIndex = rowIndex + 1 print "3) writing final file with zeros " with open('../generated/crimeData-2013:08:with-zeros.csv', 'wb') as csvfile: writer = UnicodeWriter(csvfile) for row in finalFile: writer.writerow( row ) print "4) writing final file without zeros " with open('../generated/crimeData-2013:08.csv', 'wb') as csvfile: writer = UnicodeWriter(csvfile) for row in finalFile: #check if there is any value hasValue = False numColumns = len( row )
from UnicodeWriter import UnicodeWriter # fetch list of scientwists' screennames page = open("scientwists.htm") soup = BeautifulSoup(page) scientwists = soup.findAll('div', {"class" : 'scientwist'}) screennames = [sn.a.contents[0].encode('utf-8') for sn in scientwists] #print screennames # fetch rest of info from twitter, and write to spreadsheet api = twitter.Api(username='******', password='******') outfile = open('scientwists.csv', 'w') csvout = UnicodeWriter(outfile, 'excel') # column labels on first row row = [ 'Screen Name', 'Name', 'Location', 'Description', 'Followers'] csvout.writerow(row) for user in screennames: print user try: f = api.GetUser(user) """print 'screenname: ' + `f.GetScreenName()`
class Generator: COLUMN_NAMES = [ "id", "time", "area", "crime", "found", "found-end", "found-total", "solved", "solved-perc", "solved-additionally", "commited-drugged", "commited-alcohol", "commited-recidivst", "commited-under-15", "comitted-15-17", "comitted-under-18", "charged-total", "charged-recidivist", "charged-under-15", "charged-15-17", "charged-women", "damage-total", "damage-found", ] areaLookup = "" crimeLookup = "" timeLookup = "" writer = "" areaWriter = "" crimeWriter = "" timeWriter = "" recordId = 1 files = [] districtCrimeDataSheetsByCode = {} districtCrimeDataSheets = [] # store areasheets with keys, so we can find them retrospectively to add another areasheet areaSheets = {} generateAreaLookup = False generateCrimeLookup = False generateTimeLookup = False generateCrimeData = True def __init__(self, year, month, omitZeroValues, onCompleteCallback): # null everything self.areaLookup = "" self.crimeLookup = "" self.timeLookup = "" self.writer = "" self.areaWriter = "" self.crimeWriter = "" self.timeWriter = "" self.recordId = 1 self.year = year self.month = month self.omitZeroValues = omitZeroValues self.onCompleteCallback = onCompleteCallback # 1) a) generate area lookup with xlrd.open_workbook("areaLookup.xls") as wb: sh = wb.sheet_by_index(0) self.areaLookup = AreaLookup(sh) # b) time lookup with xlrd.open_workbook("timeLookup.xls") as wb: sh = wb.sheet_by_index(0) self.timeLookup = TimeLookup(sh) # 2) generate lookups # area lookup if self.generateAreaLookup: with open("../generated/AreaLookup.csv", "wb") as f: rows = self.areaLookup.generate() numRows = len(rows) self.areaWriter = UnicodeWriter(f) for rowIndex in range(numRows): row = rows[rowIndex] self.areaWriter.writerow(row) # crime lookup - is generated manually if self.generateCrimeLookup: with xlrd.open_workbook("../files/a______.xls", "wb") as wb: sh = wb.sheet_by_index(0) self.crimeLookup = CrimeLookup(sh) with open("../generated/CrimeLookup.csv", "wb") as f: rows = self.crimeLookup.generate() numRows = len(rows) # self.crimeWriter = UnicodeWriter( f ) # for rowIndex in range( numRows ): # row = rows[ rowIndex ] # replace dot zero # row[0] = row[0].replace( ".0","" ) # self.crimeWriter.writerow( [ row[0],row[1] ] ) # time lookup if self.generateTimeLookup: with open("../generated/TimeLookup.csv", "wb") as f: rows = self.timeLookup.generate() numRows = len(rows) self.timeWriter = UnicodeWriter(f) for rowIndex in range(numRows): row = rows[rowIndex] self.timeWriter.writerow(row) # 3) go to folders and go through all folder and process all files # itirate through all files if self.generateCrimeData: # -files # -2003 # -1 # -2 # -... # -2004 # -... directory = "../files" # itirate through all year folders yearFolders = listdir(directory) for yearFolder in yearFolders: # check if is year we're interested in if str(yearFolder) == str(self.year): # itirate through all month folders monthsFolders = listdir(directory + "/" + yearFolder) for monthFolder in monthsFolders: # check if is month we're interested in if str(monthFolder) == str(self.month): # itirate through files in month folder files = listdir(directory + "/" + yearFolder + "/" + monthFolder) # get time period id periodId = self.timeLookup.getTimeIdByYearAndMonth(int(yearFolder), int(monthFolder)) for file in files: # check only for excel files if ".xls" in file or ".xlsx" in file: # omit files with underscore if not "__L" in file and not "__R" in file and not "__X" in file: # temp constrain to generate just one file # if "a0011__" in file : url = directory + "/" + yearFolder + "/" + monthFolder + "/" + file # create district sheet districtSheet = self.processFile(url, periodId) # temp if districtSheet: self.districtCrimeDataSheetsByCode[districtSheet.code] = districtSheet # self.files.append( self.processFile( url, periodId ) ) # 4) add Letiste to respective districts transports = [ {"from": "x004110", "to": "0011"}, {"from": "x064160", "to": "0602"}, {"from": "x074170", "to": "0704"}, {"from": "x174150", "to": "1706"}, {"from": "x194130", "to": "1903"}, # add train stations {"from": "x060050", "to": "0602"}, {"from": "x070050", "to": "0707"}, ] lenTransports = len(transports) for transportIndex in range(lenTransports): transport = transports[transportIndex] baseDistrictSheet = self.districtCrimeDataSheetsByCode[transport["to"]] addingDistrictSheet = self.districtCrimeDataSheetsByCode[transport["from"]] baseDistrictSheet.addDistrictCrimeDataSheet(addingDistrictSheet) # 5) generate all files from district crime data rows = self.generate() # don't need all the objects any more self.clear() # 6) write to csv file fileName = str(self.year) + ":" + str("01-") + str(self.month) if not self.omitZeroValues: fileName = fileName + ":with-zeros" with open("../generated/crimeData-" + fileName + ".csv", "wb") as f: print "start writing file " + unicode(fileName) self.writer = UnicodeWriter(f) # write header # self.writer.writerow( self.COLUMN_NAMES ) # write rest of the content numRows = len(rows) for rowIndex in range(numRows): row = rows[rowIndex] # print rows self.writer.writerow(row) # complete callback if self.onCompleteCallback: self.onCompleteCallback() def addRecordArray(self, sourceArr, arrToAdd): sourceLen = len(sourceArr) if sourceLen > 0: for i in range(sourceLen): # first three columns just identifies record if i > 2: sourceArr[i] = str(float(sourceArr[i]) + float(arrToAdd[i])) else: sourceArr = copy(arrToAdd) return sourceArr def processFile(self, fileUrl, timeId): # print "======= processing file: " + fileUrl + " ========== " with xlrd.open_workbook(fileUrl) as wb: numSheets = wb.nsheets fileRecords = [] sheetRecords = [] districtName = wb.sheet_by_index(0).row(4)[2].value areaName = wb.sheet_by_index(0).row(5)[2].value districtCode = str(self.areaLookup.getAreaCodeByName(districtName)) # if Letiste or train station, need to add next row to make it more specific firstSheetName = wb.sheet_by_index(0).name if districtCode == "-1" or firstSheetName == "a060050" or firstSheetName == "a070050": # print "getting district code" districtName = districtName + "-" + areaName districtCode = str(self.areaLookup.getAreaCodeByName(districtName)) # data records for districts districtCrimeDataSheet = DistrictCrimeDataSheet(districtCode, districtName, timeId) self.districtCrimeDataSheets.append(districtCrimeDataSheet) # self.districtCrimeDataSheetsByCode[ districtCode ] = districtCrimeDataSheet print unicode(districtName) + " - " + unicode(districtCode) if districtCode == -1: print districtCode print districtName for sheetIndex in range(numSheets): # print "========= processing sheet " + str( sheetIndex ) + " =========" sheet = wb.sheet_by_index(sheetIndex) areaSheet = AreaCrimeDataSheet(sheet, timeId, self.omitZeroValues) districtCrimeDataSheet.addAreaCrimeDataSheet(areaSheet) # testing correct naming areaName = areaSheet.name # taking code straight from name of the sheet areaCode = areaSheet.code if areaCode == -1: Logger.throwError("Unknown Area: " + unicode(areaCode) + "," + unicode(areaName)) # print unicode( areaName ) print unicode(areaCode) + "," + unicode(areaName) # generatedRecords = areaSheet.generate() # print generatedRecords # lenRecords = len( generatedRecords ) # for recordIndex in range( lenRecords ): # get single record ## record = generatedRecords[ recordIndex ] # print "========= processing single record " + str( recordIndex ) + " =========" # record.insert( 0, str( timeId ) ) # increment variable value # self.recordId += 1 # fileRecords.extend( generatedRecords ) # fileRecords.extend( districtCrimeDataSheet.generate() ) # return fileRecords return districtCrimeDataSheet def generate(self): # print "======= generating =======" files = [] lenDistrictSheet = len(self.districtCrimeDataSheets) for sheetIndex in range(lenDistrictSheet): # for districtSheetIndex in self.districtCrimeDataSheetsByCode: # print "========= new district ========== " + unicode( sheetIndex ) districtSheet = self.districtCrimeDataSheets[ sheetIndex ] # self.districtCrimeDataSheetsByCode[ districtSheetIndex ] files.extend(districtSheet.generate()) return files def clear(self): self.districtCrimeDataSheets = [] self.districtCrimeDataSheetsByCode.clear()
#/usr/bin/env python # -*- coding: utf-8 -*- # processing multiple root json as custom json or csv import json import csv from UnicodeWriter import UnicodeWriter with open('4s_venues.json', 'r') as f, open('output_file.csv', 'w') as fp: writer = UnicodeWriter(fp) for i, line in enumerate(f): response = json.loads(line) uid = response['venue']['id'] name = response['venue']['name'] address = response['venue']['location']['address'] city = reponse['venue']['location'] writer.writerow([uid, text])
communityName = row[1].value #is population a float value? try: communityPopulation = locale.atof( str( row[9].value ) ) #find corresponding community in lookup and add population global communityLookup communityLookup.addPopulationToCommunityByName( communityName, communityPopulation ) except: pass #there was dash in the population value #3) go through all through lookup print "3) go through all through lookup " with open('communities-with-pop.csv', 'wb') as f: writer = UnicodeWriter( f ) global communityLookup numCommunities = len( communityLookup.communities ) for communityName in communityLookup.communities: communityRecord = communityLookup.getCommunityByName( communityName ) writer.writerow( communityRecord.generate() ) #4) create oop lookup #print "4) create oop lookup" #with open('oop.csv', 'rb') as csvfile: #csvreader = csv.reader(csvfile) #global oopLookup #oopLookup = OopLookup( csvreader )
#code = int( row[0].value ) #zsjRecord = zsjLookup.getZsjByCode( code ) #if zsjRecord != -1: #shapeLookup.zsjRecord = zsjRecord #oopRecord = oopLookup.getOop( zsjRecord.town, zsjRecord.county ) #if oopRecord: #shapeLookup.oopRecord = oopRecord #print oopRecord #if not oopRecord: # print zsjRecord.town + "," + zsjRecord.zsj + "," + zsjRecord.county + "," + zsjRecord.region #5) with open('shape-with-zsj.csv', 'wb') as f: print "start writing file" writer = UnicodeWriter( f ) global shapeLookup records = shapeLookup.generate() lenRecords = len( records ) #write header writer.writerow( [ "_ID,N,24,5", "_ID,N,24,5", "_GEO,C,192", "_KOD_LAU2,N,10,0", "OBEC_kod", "Obec", "Castobce_dil_kod", "Castobce_dil", "ZSJ_kod", "ZSJ", "Obvykle_bydlici", "Trvale_bydlici", "Kraj_kod", "Kraj", "Okres_kod", "Okres", "ORP_kod", "ORP", "OOP" ] ) for index in range( lenRecords ): writer.writerow( records[ index ] )
global areasByCode csvReader = csv.reader(contactfile, delimiter=',') for row in csvReader: area = areasByCode.get( row[0], -1 ) if area != -1: contacts[ area[0] ] = row else: print "cound't find contact" print row[0] #part for uppercasing normalizing the names normalize = False if normalize: with open('areas3.csv', 'wb' ) as f: writer = UnicodeWriter( f ) for rowIndex in areas: row = areas[ rowIndex ] id = row[0] name = row[1].decode( "utf-8" ).lower().title() name = name.replace( "Oop", "OOP" ) name = name.replace( "Mop", "MOP" ) name = name.replace( "Úo".decode("utf-8"), "ÚO".decode("utf-8") ) name = name.replace( "Křp".decode("utf-8"), "KŘP".decode("utf-8") ) pop = row[2] contact = row[3] writer.writerow( [ unicode( id ), unicode( name ), unicode( pop ), unicode( contact ) ] )
# substrings to construct our Amazon affiliate links and image URLs link0 = "http://www.amazon.com/gp/product/" link1 = "/ref=as_li_ss_tl?ie=UTF8&camp=1789&creative=390957&creativeASIN=" link2 = "&linkCode=as2&tag=buyusabrand-20" pict0 = "http://ws.assoc-amazon.com/widgets/q?_encoding=UTF8&ASIN=" pict1 = "&Format=_SL160_&ID=AsinImage&MarketPlace=US&ServiceVersion=20070822&WS=1&tag=buyusabrand-20" soup = BeautifulSoup(urllib2.urlopen("http://www.21usdeal.com/zh/")) #soup = BeautifulSoup(open("21usdeal_test.html")) titles = soup.findAll(attrs={'class' : "art-postheader"}) descrs = soup.findAll(attrs={'class' : "art-postcontent"}) links = soup.findAll(attrs={'class' : "morelink"}) with open('wordpress.csv', 'wb') as f: writer = UnicodeWriter(f, delimiter=",") writer.writerow(["csv_post_title", "csv_post_post", "csv_post_type", "csv_post_excerpt", "csv_post_categories", "csv_post_tags", "csv_post_date", "desc", "link", "picture"]) for (title, descr, link) in zip(titles, descrs, links): link = link.a.next_sibling.get('href') # only run if this is an Amazon link if link0 in link : title = title.a.get_text(strip=True) descr = descr.get_text(strip=True) # extract the Amazon product ID asin = link.split('/')[5] link = ""+link0+asin+link1+asin+link2 pict = ""+pict0+asin+pict1
import urllib2 from xml.dom.minidom import parseString import re import csv from UnicodeWriter import UnicodeWriter from htmlentitydefs import name2codepoint pattern = re.compile(r'&(?:(#)(\d+)|([^;]+));') output = open("merchant_category_information.csv", "wb") UnicodeWriterObj = UnicodeWriter(output) #csv_writer = csv.writer(output) UnicodeWriterObj.writerow(['Updated On', 'Merchant id', 'Merchant Name', 'averageConversionRate', 'averageCommission', 'logo', 'has_logo', 'category_id', 'category_name', 'domain_id', 'domain_name', 'country']) for i in range(1, 624): info_pull_url = 'http://api-merchants.skimlinks.com/merchants/xml/639099dd7e85abb8717d17662901ecae/category/'+str(i)+'/limit/100000000' file = urllib2.urlopen(info_pull_url) data = file.read() file.close() dom = parseString(data) xmlTag = dom.getElementsByTagName('merchant')#[0].toxml()
br.set_handle_gzip(True) br.set_handle_redirect(True) br.set_handle_referer(True) br.set_handle_robots(False) # Follows refresh 0 but not hangs on refresh > 0 br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1) # User-Agent (this is cheating, ok?) br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux \ i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')] # The site we will navigate into, handling it's session today = time.strftime('%Y_%m_%d') file=open('craigsList_albanyga'+today+'.csv','wb') UnicodeWriterObj = UnicodeWriter(file) UnicodeWriterObj.writerow(['URL','Name','Location', 'Email', 'Description','Date','Phone']) def main(): #ExtractDetails('http://raleigh.craigslist.org/apa/3275803941.html') CreatedURL = 'http://albanyga.craigslist.org/apa/' ExtractLandingURL(str(CreatedURL)) for i in range(100, 500, 100): SummaryURL = 'http://albanyga.craigslist.org/apa/index'+str(i)+'.html' print "*"*45 print SummaryURL print "*"*45 ExtractLandingURL(str(SummaryURL))
def __init__(self, year, month, omitZeroValues, onCompleteCallback): # null everything self.areaLookup = "" self.crimeLookup = "" self.timeLookup = "" self.writer = "" self.areaWriter = "" self.crimeWriter = "" self.timeWriter = "" self.recordId = 1 self.year = year self.month = month self.omitZeroValues = omitZeroValues self.onCompleteCallback = onCompleteCallback # 1) a) generate area lookup with xlrd.open_workbook("areaLookup.xls") as wb: sh = wb.sheet_by_index(0) self.areaLookup = AreaLookup(sh) # b) time lookup with xlrd.open_workbook("timeLookup.xls") as wb: sh = wb.sheet_by_index(0) self.timeLookup = TimeLookup(sh) # 2) generate lookups # area lookup if self.generateAreaLookup: with open("../generated/AreaLookup.csv", "wb") as f: rows = self.areaLookup.generate() numRows = len(rows) self.areaWriter = UnicodeWriter(f) for rowIndex in range(numRows): row = rows[rowIndex] self.areaWriter.writerow(row) # crime lookup - is generated manually if self.generateCrimeLookup: with xlrd.open_workbook("../files/a______.xls", "wb") as wb: sh = wb.sheet_by_index(0) self.crimeLookup = CrimeLookup(sh) with open("../generated/CrimeLookup.csv", "wb") as f: rows = self.crimeLookup.generate() numRows = len(rows) # self.crimeWriter = UnicodeWriter( f ) # for rowIndex in range( numRows ): # row = rows[ rowIndex ] # replace dot zero # row[0] = row[0].replace( ".0","" ) # self.crimeWriter.writerow( [ row[0],row[1] ] ) # time lookup if self.generateTimeLookup: with open("../generated/TimeLookup.csv", "wb") as f: rows = self.timeLookup.generate() numRows = len(rows) self.timeWriter = UnicodeWriter(f) for rowIndex in range(numRows): row = rows[rowIndex] self.timeWriter.writerow(row) # 3) go to folders and go through all folder and process all files # itirate through all files if self.generateCrimeData: # -files # -2003 # -1 # -2 # -... # -2004 # -... directory = "../files" # itirate through all year folders yearFolders = listdir(directory) for yearFolder in yearFolders: # check if is year we're interested in if str(yearFolder) == str(self.year): # itirate through all month folders monthsFolders = listdir(directory + "/" + yearFolder) for monthFolder in monthsFolders: # check if is month we're interested in if str(monthFolder) == str(self.month): # itirate through files in month folder files = listdir(directory + "/" + yearFolder + "/" + monthFolder) # get time period id periodId = self.timeLookup.getTimeIdByYearAndMonth(int(yearFolder), int(monthFolder)) for file in files: # check only for excel files if ".xls" in file or ".xlsx" in file: # omit files with underscore if not "__L" in file and not "__R" in file and not "__X" in file: # temp constrain to generate just one file # if "a0011__" in file : url = directory + "/" + yearFolder + "/" + monthFolder + "/" + file # create district sheet districtSheet = self.processFile(url, periodId) # temp if districtSheet: self.districtCrimeDataSheetsByCode[districtSheet.code] = districtSheet # self.files.append( self.processFile( url, periodId ) ) # 4) add Letiste to respective districts transports = [ {"from": "x004110", "to": "0011"}, {"from": "x064160", "to": "0602"}, {"from": "x074170", "to": "0704"}, {"from": "x174150", "to": "1706"}, {"from": "x194130", "to": "1903"}, # add train stations {"from": "x060050", "to": "0602"}, {"from": "x070050", "to": "0707"}, ] lenTransports = len(transports) for transportIndex in range(lenTransports): transport = transports[transportIndex] baseDistrictSheet = self.districtCrimeDataSheetsByCode[transport["to"]] addingDistrictSheet = self.districtCrimeDataSheetsByCode[transport["from"]] baseDistrictSheet.addDistrictCrimeDataSheet(addingDistrictSheet) # 5) generate all files from district crime data rows = self.generate() # don't need all the objects any more self.clear() # 6) write to csv file fileName = str(self.year) + ":" + str("01-") + str(self.month) if not self.omitZeroValues: fileName = fileName + ":with-zeros" with open("../generated/crimeData-" + fileName + ".csv", "wb") as f: print "start writing file " + unicode(fileName) self.writer = UnicodeWriter(f) # write header # self.writer.writerow( self.COLUMN_NAMES ) # write rest of the content numRows = len(rows) for rowIndex in range(numRows): row = rows[rowIndex] # print rows self.writer.writerow(row) # complete callback if self.onCompleteCallback: self.onCompleteCallback()
czechCrime = "" if crime != -1: englishCrime = crime[3] a = englishCrime.split(" ") a[0] = a[0].capitalize() englishCrime = " ".join(a) czechCrime = crime[1] a = czechCrime.split(" ") a[0] = a[0].capitalize() czechCrime = " ".join(a) else: print row[0], row[1] finalRow = [row[0], czechCrime, englishCrime, row[3]] finalRows.append(finalRow) with open("crimeLookup2.csv", "wb") as csvfile: writer = UnicodeWriter(csvfile) for row in finalRows: rowArray = [] for column in row: #print column rowArray.append(unicode(column.decode("utf-8"))) writer.writerow(rowArray)