def parse_expect(self, input, expected_result, **kw): parser = csv.parser() for kw_arg, kw_value in kw.items(): setattr(parser, kw_arg, kw_value) result = [] for line in string.split(input, '\n'): fields = parser.parse(line) if not fields: continue result.append(fields) self.assertEqual(expected_result, result)
def iif_to_list(fd): parser = csv.parser(field_sep='\t') rows = [] while 1: line = ifile.readline() if not line: break fields = parser.parse(line) if not fields: continue rows.append(fields) fd.close() return rows
def __init__(self, master): self.master = master self.f1 = Frame(self.master) self.canvas = Canvas(self.f1, width=width, height=height, background="#ffffff") self.img = Image.open(map_file) self.pimg = ImageTk.PhotoImage(self.img) self.img = self.canvas.create_image(0, 0, anchor=NW, image=self.pimg) self.canvas.bind("<Button-1>", self.mousedown) self.nodes = {} p = csv.parser() n_file = open(nodes_file) headers = n_file.readline() while 1: line = n_file.readline() if not line: break (idx, x, y) = p.parse(line) x = int(x) y = int(y) self.nodes[idx] = (x, y) node = self.canvas.create_oval(x - node_radius, y - node_radius, x + node_radius, y + node_radius, fill=node_color, outline=node_outline_color) self.segments = [] s_file = open(segments_file) headers = s_file.readline() while 1: line = s_file.readline() if not line: break (start, end) = p.parse(line) start = int(start) end = int(end) self.segments.append((start, end)) self.canvas.pack() self.l = Label(self.f1, text="foo") self.l.pack() self.f1.pack()
def __init__(self,master): self.master = master self.f1 = Frame(self.master) self.canvas = Canvas(self.f1,width=width,height=height,background="#ffffff") self.img = Image.open(map_file) self.pimg = ImageTk.PhotoImage(self.img) self.img = self.canvas.create_image(0,0,anchor=NW,image=self.pimg) self.canvas.bind("<Button-1>",self.mousedown) self.nodes = {} p = csv.parser() n_file = open(nodes_file) headers = n_file.readline() while 1: line = n_file.readline() if not line: break (idx,x,y) = p.parse(line) x = int(x) y = int(y) self.nodes[idx] = (x,y) node = self.canvas.create_oval(x - node_radius,y-node_radius,x+node_radius,y+node_radius,fill=node_color,outline=node_outline_color) self.segments = [] s_file = open(segments_file) headers = s_file.readline() while 1: line = s_file.readline() if not line: break (start,end) = p.parse(line) start = int(start) end = int(end) self.segments.append((start,end)) self.canvas.pack() self.l = Label(self.f1,text="foo") self.l.pack() self.f1.pack()
def convert(self): # # *Convert the input file rows into XML, # * and outputting. # * # * # ** output =open(self.outputFile, 'w') output.write(self.header1+self.header2+self.header3) if self.debug: print "convert: writing to ", self.outputFile print "convert: reading fm: ", self.inputFile output.write(self.OPEN_START+self.rootname+self.CLOSE) #ROOT tag # p = csv.parser() f = open(self.inputFile,'r') nrows = int(self.nfields) # number of fields #Read and parse input file till full record in hand as list while 1: line = f.readline() if not line: break rec = p.parse(line) if rec is not None: output.write(self.OPEN_START+self.rowname+self.CLOSE+self.NEWLINE) #<entry> idx = 0 for fld in rec: #idx = rec.index(fld) #index of this item nfld1 = string.replace(fld,"&","&") # subst for & nfld = string.replace(nfld1,"£","£") # subst for £ tag = self.fieldnames['field'+str(idx)] output.write(self.INDENT+self.OPEN_START+tag+self.CLOSE) output.write(nfld) output.write(self.OPEN_END+tag+self.CLOSE+self.NEWLINE) idx += 1 output.write(self.OPEN_END+self.rowname+self.CLOSE+self.NEWLINE)# </entry> output.write(self.OPEN_END+self.rootname+self.CLOSE) #ROOT tag output.close()
''' Takes the file object called "fd" and returns a list of title events, e.g. [(35, "Alchemy"), (2003, "Banana")]''' ret = [] # ASSUME that no revision is more than 64MiB long :-) SLURPSIZE = 65536 * 1024 chunk = fd.read(SLURPSIZE) # a "character array", not decoded Unicode offset = 0 # now we get to use multiline regexes while chunk: title_pos = title_position(chunk) if title_pos > -1: title = title_string(chunk) # FIXME: call can be optimized to include title_pos start = title_pos length = 0 # who cares?, so long as we don't match it next time ret.append((offset + title_pos, title)) chunk = chunk[title_pos:] offset += title_pos chunk += fd.read(SLURPSIZE - len(chunk)) else: return ret # we're done if __name__ == '__main__': import sys import csv p = csv.parser() values = fd2lists(sys.stdin) for vals in values: print p.join(vals)
#!/usr/bin/python # Hacking at XML with regular expressions is great fun. if __name__ == '__main__': import sys import csv titleparser = csv.parser() revparser = csv.parser() try: titles = open(sys.argv[1]) revs = open(sys.argv[2]) except: print "Usage: %s title-file.csv revision-file.csv" % sys.argv[0] raise AssertionError() titledata = [] for line in titles.xreadlines(): offset, title = titleparser.parse(line) titledata.append((int(offset), title)) for line in revs.xreadlines(): offset, length, eyedee = revparser.parse(line) offset, length = int(offset), int(length) # if the offset is less than the first titledata, then we're doomed if offset < titledata[0][0]: raise AssertionError("OMG") # Now we know it's >= titledata[0][0] # if it's > titledata[1][0], then we should pop titledata elif len(titledata) > 1 and offset > titledata[1][0]: title = titledata.pop(0)[1] else:
def csv2numeric(self, filename, types): """ opens a csv file and reads it in to named Numeric arrays. the file must have the names of the columns on the first line types is an array of types to treat the columns as. eg, for a file like a,b 1,2.3 2,5.6 3,7.84 you would do data = self.csv2numeric("file.csv",[int,float]) and data would look like: {'a': array([1 2 3]), 'b': array([2.3 5.6 7.84])} it can really only handle numerical datatypes. ie, you can't have strings or even alphabetical characters in the file other than the first header row. """ if not re.match("(http:|\/|file:)", filename): filename = urlparse.urljoin(self.uri_base, filename) file = urllib.urlopen(filename) arrays = [] headers = [] try: p = csv.parser() header_line = file.readline() fields = p.parse(header_line) for f in fields: arrays.append([]) headers.append(f) while 1: line = file.readline() if not line: break fields = p.parse(line) for i in range(len(fields)): typecode = types[i] try: arrays[i].append(typecode(fields[i])) except: arrays[i].append(fields[i]) except AttributeError: # must be using python 2.3 reader = csv.reader(file) fields = reader.next() for f in fields: arrays.append([]) headers.append(f) for row in reader: fields = row for i in range(len(fields)): typecode = types[i] try: arrays[i].append(typecode(fields[i])) except: try: arrays[i].append(fields[i]) except: pass results = {} for i in range(len(headers)): results[headers[i]] = array(arrays[i]) return results
def parse_exception(self, input, exception, **kw): parser = csv.parser() for kw_arg, kw_value in kw.items(): setattr(parser, kw_arg, kw_value) self.assertRaises(exception, parser.parse, input)
def join_expect(self, input, expected_result, **kw): parser = csv.parser() for kw_arg, kw_value in kw.items(): setattr(parser, kw_arg, kw_value) result = parser.join(input) self.assertEqual(expected_result, result)
def csv2numeric(self,filename,types): """ opens a csv file and reads it in to named Numeric arrays. the file must have the names of the columns on the first line types is an array of types to treat the columns as. eg, for a file like a,b 1,2.3 2,5.6 3,7.84 you would do data = self.csv2numeric("file.csv",[int,float]) and data would look like: {'a': array([1 2 3]), 'b': array([2.3 5.6 7.84])} it can really only handle numerical datatypes. ie, you can't have strings or even alphabetical characters in the file other than the first header row. """ if not re.match("(http:|\/|file:)",filename): filename = urlparse.urljoin(self.uri_base,filename) file = urllib.urlopen(filename) arrays = [] headers = [] try: p = csv.parser() header_line = file.readline() fields = p.parse(header_line) for f in fields: arrays.append([]) headers.append(f) while 1: line = file.readline() if not line: break fields = p.parse(line) for i in range(len(fields)): typecode = types[i] try: arrays[i].append(typecode(fields[i])) except: arrays[i].append(fields[i]) except AttributeError: # must be using python 2.3 reader = csv.reader(file) fields = reader.next() for f in fields: arrays.append([]) headers.append(f) for row in reader: fields = row for i in range(len(fields)): typecode = types[i] try: arrays[i].append(typecode(fields[i])) except: try: arrays[i].append(fields[i]) except: pass results = {} for i in range(len(headers)): results[headers[i]] = array(arrays[i]) return results