def getFileInfo(self, filename): ''' return general information of the given [filename], like it's size ''' f = QtCore.QFileInfo(filename) size = f.size() size_type = ['Byte', 'kB', 'MB', 'GB', 'TB', 'PB'] size_index = 0 while size > 1024: size /= 1024.0 size_index += 1 size = "%.3g %s" % (round(size, 3), size_type[size_index]) n = f.fileName() return '''File name:\t%s Folder:\t%s File type:\t%s File size:\t%s Last changed:\t%s''' % (n, f.filePath()[:-len(n)], f.suffix(), size, f.lastModified().toString()) #TODO: not used at the moment #but useful # def getMetaData(self, filename): # ''' # Find and format the meta data of [filename] # ''' # text = "" # if filename.filetype().lower() not in ('tif', 'tiff'): # #for all files except TIF # filename, realname = unicodeFilename(filename), filename # parser = createParser(filename, realname) # if not parser: # print "Unable to parse file" # try: # metadata = extractMetadata(parser) # except HachoirError, err: # #AttributeError because hachoir uses # print "Metadata extraction error: %s" % unicode(err) # metadata = None # if not metadata: # print "Unable to extract metadata" # else: # text = "\n".join(metadata.exportPlaintext()) # else: # #for TIF images # import exifread # # Open image file for reading (binary mode) # f = open(filename, 'rb') # # Return Exif tags # tag_dict = exifread.process_file(f) # for key, value in tag_dict.iteritems(): # text += "%s: %s\n" %(key, value) # return text
def open(self, filename): prefs = self.preferences #VARIABLES: buff = prefs.pBuffer.value() step = prefs.pReadEveryNLine.value() stop_at_line = prefs.pHasStopLine.value() stop_n = prefs.pStopLine.value() x_col = prefs.pXColumn.value() has_x_col = prefs.pHasXColumn.value() self.n_line = 0 #number of current line step_len = 0 #number of lines in the read file_part n_defective_lines = 0 # number of corrupted lines i = 0 # current index in data array at_end = False # is file the the end labels = filename #COUNT LINES: if stop_at_line: self.nLines = stop_n else: modified = QtCore.QFileInfo(filename).lastModified().toString() #ONLY OF THIS IS A NEW FILE OR THE FILE WAS MODIFIED: if not (self._lastFilename == filename and self._lastModified == modified): self.nLines = countLines(filename, prefs.pBuffer.value()) self._lastnLines = self.nLines self._lastFilename = filename self._lastModified = modified else: self.nLines = self._lastnLines #GET NUMBER OF COLUMNS: with open(filename, 'r') as f: #FIND SEPARATOR: if prefs.pFindSeparator.value(): prefs.pSeparator.setValue(self._findSeparator(f)) separator = prefs.separators[prefs.pSeparator.value()] #DEFINE NUMBER OF COLUMNS: n_col = f.readline().count(separator) + 1 f.seek(0) #back to begin #FILTER COLUMNS: filter_columns = prefs.pFilterColumns.value() if filter_columns: c = [ch.value() for ch in prefs.pFilterColumns.children()[1:]][:n_col] #ADD X COLUMN TO THE BEGINNING: if has_x_col: if x_col not in c: c.insert(0, x_col) n_col = len(c) col_filter = itemgetter(*c) elif n_col > 1: col_filter = lambda l: l[:n_col] else: col_filter = lambda l: l[0] #GET FIRST LINE NAMES: fline = prefs.pFirstLine.value() if fline != '-': names = col_filter(f.readline().split(separator)) if fline == 'axes names': for n, ax in enumerate(self.display.axes): ax.p.setValue(names[n]) else: #plot names: labels = names if has_x_col: labels.pop(x_col) #JUMP TO START POSITION: startline = prefs.pStartLine.value() if startline: f.readline(startline) self.n_line = startline #PRINT FIRST 10 LINES: if prefs.pPrintFirstLines.value(): startPos = f.tell() print '==========' for line in f.readlines(min(self.nLines, 10)): #TODO: do the check for \n only once # ??? doesn't have every line \n at the end?? #print line[-2:] #if line[-2:] == '\n': # line = line[:-2] print '[%s]: %s' % (self.n_line, line) print '--> %s\n' % str(col_filter(line.split(separator))) f.seek(startPos) print '==========' #CREATE DATA ARRAY: shape = self.nLines / step if n_col == 0: raise Exception('no columns given') elif n_col > 1: shape = (shape, n_col) data = np.empty(shape=shape, dtype=prefs.dtypes[prefs.pDType.value()]) #MAIN LOOP: while not self.canceled: #READ PART OF THE FILE: file_piece = f.readlines(buff) l = len(file_piece) if not l: break for n, line in enumerate(file_piece[::step]): #line = line[:-1] #FILTER COLUMNS: line = col_filter(line.split(separator)) self.n_line = n + step_len #ADD LINE TO ARRAY: try: data[i] = line i += 1 except ValueError: n_defective_lines += 1 #CHECK BREAK CRITERIA: if stop_at_line and self.n_line >= stop_n: at_end = True break if at_end: break step_len += l data = data[:i] print '%s lines were corrupted' % n_defective_lines #SPLIT ARRAY IF NEEDED: if (has_x_col and n_col > 2) or (not has_x_col and n_col > 1): if has_x_col: x = data[:, x_col] #GET Y COLUMNS THROUGH REMOVING THE X COLUMN: y_cols = np.c_[data[:, :x_col], data[:, x_col + 1:]] else: y_cols = data l = [] #CREATE TUPLE OF [y_n] OR [x, y_n] arrays: for n in range(y_cols.shape[1]): y = y_cols[:, n] if has_x_col: y = np.c_[x, y] l.append(y) return tuple(l), labels return [data], labels