def import_csv(self, request): if request.method == "POST": try: csv_file = TextIOWrapper(request.FILES['csv_file'].file, encoding=request.encoding) dialect = csv.Sniffer().sniff(csv_file.read(1024)) csv_file.seek(0) reader = csv.DictReader(csv_file, dialect=dialect) except Exception as err: self.message_user(request, "Error: {}".format(err)) return redirect("..") try: if '/student/' in request.path: user_type = Student elif '/faculty/' in request.path: user_type = Faculty else: raise Http404 create_users(user_type, reader) except Exception as err: messages.error( request, f'Error on row number {reader.line_num}: {err}') else: messages.success(request, "Your csv file has been imported") return redirect("..") form = BulkImportForm() payload = {"form": form} return render(request, "admin/bulk_import_form.html", payload)
def importData(dbase, filename, user): """Function called by Gramps to import data on persons in CSV format.""" if dbase.get_feature("skip-import-additions"): # don't add source or tags parser = CSVParser(dbase, user, None) else: parser = CSVParser(dbase, user, (config.get('preferences.tag-on-import-format') if config.get('preferences.tag-on-import') else None)) try: with open(filename, 'rb') as filehandle: line = filehandle.read(3) if line == codecs.BOM_UTF8: filehandle.seek(0) filehandle = TextIOWrapper(filehandle, encoding='utf_8_sig', errors='replace', newline='') else: # just open with OS encoding filehandle.seek(0) filehandle = TextIOWrapper(filehandle, errors='replace', newline='') parser.parse(filehandle) except EnvironmentError as err: user.notify_error(_("%s could not be opened\n") % filename, str(err)) return return ImportInfo({_("Results"): _("done")})
def extract_csv(self): assert self.extension in ['csv'] self.content = self.file.read() self.properties['filename'] = self.doc_name.split('/')[-1] self.properties['extension'] = self.get_extension() wrapper = TextIOWrapper(codecs.getreader("utf-8")(self.file)) wrapper.seek(0,0) reader = csv.DictReader(wrapper) data_columns = [] file_delimiter = '' for row in reader: items = {k: v for k, v in row.items()} for key in items.keys(): if ';' in key: file_delimiter = ';' else: file_delimiter = ',' data_columns = [x for x in list(items.keys())] if file_delimiter == ';': data_columns = data_columns[0].split(';') break; break; row_count = sum(1 for row in reader) self.properties['content'] = self.content.decode('utf-8')[:500] self.properties["nb_rows"] = row_count self.properties["file_delimiter"] = file_delimiter self.properties["header_columns"] = data_columns return self.properties
def readback(f: io.TextIOWrapper) -> None: pos = f.tell() while pos >= 0: f.seek(pos) if f.read(1) == "\n": break pos -= 2
def struncate(file: TextIOWrapper, amount: int): """ Truncate the first n bytes from the beginning of file :param file :param amount: amount of bytes to remove from start :type file: TextIOWrapper :type amount: int """ #Get file size file.seek(0, 2) file_size = file.tell() #Go to the beginning of file file_offset = amount file.seek(0, 0) bytes_to_write = file_size - amount bytes_written = 0 while bytes_written < bytes_to_write: #Move to offset + bytes_written file.seek(file_offset + bytes_written, 0) #Get bytes to rewrite block_size = 1024 if bytes_to_write - bytes_written < block_size: block_size = bytes_to_write - bytes_written #Read block block_data = file.read(block_size) #Move to the beginning of file + bytes_written file.seek(bytes_written, 0) #Write block bytes_written += file.write(block_data) #Then truncate file.flush() #Flush write first file.seek(bytes_written) file.truncate()
def parse_file(fileList, selected_fleet): DataCollection.objects.create( name=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) for i in fileList: file = fileList[i] fp = TextIOWrapper(file.open(), encoding='utf-8') fp.seek(0) content = list( csv.DictReader(fp, delimiter=',', fieldnames=[ 'time', 'MCU', 'number', 'model name', 'amp hour', 'battery volts', 'hours', 'miles' ], restkey='extrafields')) x = 0 for row in content: f = CartFleet.objects.filter(name=selected_fleet) c = Cart.objects.create(number=row['number'], cart_fleet=f[0]) if x == 0: DataCol = DataCollection.objects.create(name=row['time']) d = DataSet.objects.create(collection=DataCol, collection_date=datetime.datetime.now(), amp_hours=row['amp hour'], mileage=row['miles'], hours=row['hours'], cart=c) for column in range(6, len(row['extrafields']), 19): if (row['extrafields'][column] != 0): Fault.objects.create(code=row['extrafields'][column], hour=row['extrafields'][column + 2], cart=c) else: break x = x + 1
def get_reader(handle: io.TextIOWrapper): logger.debug(f"Reading '{handle.name}' content") sniffer = csv.Sniffer() dialect = sniffer.sniff(handle.read(2048)) handle.seek(0) return csv.reader(handle, dialect=dialect)
def validate_file_for_import(csv_file: io.TextIOWrapper, logger, detailed=False, strict=False) -> int: num_errors = 0 num_valid_lines = 0 csv_file.seek( 0) # set to start of file in case it has been read earlier line: str = csv_file.readline() while line and line != "": try: printable_line = line if detailed: # do not print passwords printable_line = line.split(",")[0] UserCommand._validate_user_or_throw(line, logger) else: logger.debug("> username - {}".format(line)) UserCommand._validate_username_or_throw(line) num_valid_lines += 1 except Exception as exc: logger.info( _("importcsvsummary.error.line").format( printable_line, exc, "")) num_errors += 1 line = csv_file.readline() if strict and num_errors > 0: Errors.exit_with_error(logger, _("importcsvsummary.error.too_many_errors")) return num_valid_lines
def open(self, path, mode='rb'): """ Access paths as a file-like object Parameters ---------- path: str The path of the file to access mode: str The file mode for the opened file Returns ------- file: BytesIO A BytesIO handle for the specified path, works like a file object """ datafile = DataFile(path, BytesIO()) if mode.startswith('r'): self._read(datafile) if not mode.endswith('b') and PY3: handle = TextIOWrapper(datafile.handle) else: handle = datafile.handle yield handle if mode.startswith('w'): handle.seek(0) self._write(datafile) datafile.handle.close()
def decode_bytes_from_file(the_file: TextIOWrapper, search_variable_name: str): search_variable_name = search_variable_name.strip() search_var_name = re.match(r'^(.*?)(?:_base(\d\d))?$', search_variable_name) var_base_name = str(search_var_name[1]) encode_bases = [str(search_var_name[2]) ] if search_var_name.lastindex > 1 else ('64', '85', '32', '16') saved_file_position = 0 if the_file.seekable(): saved_file_position = the_file.tell() the_file.seek(0) file_content = the_file.read() if the_file.seekable(): the_file.seek(saved_file_position) for enc in encode_bases: reg_exp = var_base_name + "_base" + str( enc) + r"\s*=\s*[a-zA-Z]{0,2}'''(.*?)'''" var_found = re.match(reg_exp, file_content, re.DOTALL) if var_found: if hasattr(base64, 'b' + enc + 'decode'): decoded = getattr(base64, 'b' + enc + 'decode')(var_found[1]) return var_base_name, bytes(decoded) return None, f'Variable found with unsupported encoding: base{enc}' return None, 'Variable not found'
def searchback(self, f: io.TextIOWrapper, dtarget: datetime): linetime = dtarget while linetime == dtarget: LogBisect.readback(f) saved = f.tell() match = self.regex.search(f.readline()) linetime = parse(match.group()) f.seek(saved, io.SEEK_SET)
def get_csv_line_count(csv_file: TextIOWrapper, header: bool) -> int: """ Get the number of features in the csv file """ count = sum(1 for _ in csv.reader(csv_file)) csv_file.seek(0) # return the pointer to the first line for reuse return max(count - int(header), 0)
def boomerang_stream(stream: TextIOWrapper) -> TextIOWrapper: """ Yield a stream that goes back to the original offset after exiting the "with" context :param stream: The stream """ current_offset = stream.tell() yield stream stream.seek(current_offset)
def fake_stdin(data): if PY2: stdin = tempfile.TemporaryFile() else: stdin = TextIOWrapper(tempfile.TemporaryFile()) stdin.write(data) stdin.flush() stdin.seek(0) return stdin
def clean_expense_file(self): try: data = self.cleaned_data['expense_file'] file = TextIOWrapper(data.file, encoding='ASCII') dialect = csv.Sniffer().sniff(file.read(1024)) file.seek(0) return file except: raise forms.ValidationError('Csv file required.')
def _run_command(self, command: str) -> str: stream = BytesIO() out = TextIOWrapper(stream) with patch("sys.stdout", out): self.cli.run_command(command, use_json=True, raise_exceptions=True) self.assertFalse(self.cli.engine.partial, "Command was not terminated properly") out.seek(0) return out.read()
def bisect(self, f: io.TextIOWrapper, regex_src: str, dtarget: datetime) -> int: self.regex = re.compile(regex_src) self.dtarget = dtarget start = 0 f.seek(0, io.SEEK_END) end = f.tell() self.mid_bisect(f, start, end) self.searchback(f, dtarget) return f.tell()
def import_csv(self, request): """ Function to bulk import user details from a CSV file """ if request.method == "POST": form = forms.BulkImportForm(request.POST, request.FILES) if not form.is_valid(): self.message_user(request, "Error: Invalid form", level=messages.ERROR) return self.render_bulk_import_form(request, form) try: csv_file = TextIOWrapper(form.cleaned_data['csv_file'], encoding=request.encoding) dialect = csv.Sniffer().sniff(csv_file.read()) csv_file.seek(0) reader = csv.DictReader(csv_file, dialect=dialect) except Exception as err: self.message_user(request, "Error: {}".format(err), level=messages.ERROR) return self.render_bulk_import_form(request, form) try: send_email = form.cleaned_data['send_email'] ignore_existing = form.cleaned_data['ignore_existing'] user_type = self.get_user_type(request) staff = self.is_user_staff() created_users = self.create_users( user_type, reader, staff, send_email, skip_existing=ignore_existing) except Exception as err: self.message_user( request, f"Error on row number {reader.line_num}: {err}", level=messages.ERROR) return self.render_bulk_import_form(request, form) else: created_users = [escape(x) for x in created_users] names = '<br/>'.join(created_users) self.message_user( request, mark_safe("{} users have been created:<br/>{}".format( len(created_users), names))) return redirect("..") else: return self.render_bulk_import_form(request, forms.BulkImportForm())
def get_lines_from_thing_file(thing_file: str): data_file = BytesIO() data_wrapper = TextIOWrapper( data_file, encoding="iso-8859-1", line_buffering=True, ) file_path = "RETR " + THINGS_FOLDER_NAME_TITELIVE + "/" + thing_file connect_to_titelive_ftp().retrbinary(file_path, data_file.write) data_wrapper.seek(0, 0) return iter(data_wrapper.readlines())
class CSVReader(Reader): def __init__(self): Reader.__init__(self) self._file = None self._text_stream = None def open(self, path): self.set_total(os.stat(path).st_size) try: self._file = open(path, mode='rb') byts = self._file.read(4096) det = chardet.detect(byts) encoding = det['encoding'] self._file.seek(0) if encoding == 'ascii': encoding = 'utf-8-sig' self._text_stream = TextIOWrapper(self._file, encoding=encoding, errors='replace') try: some_data = self._text_stream.read(131072) if len(some_data ) == 131072: # csv sniffer doesn't like partial lines some_data = trim_after_last_newline(some_data) self._dialect = csv.Sniffer().sniff(some_data, ', \t;') except csv.Error as e: log.exception(e) self._dialect = csv.excel self._dialect.doublequote = True except Exception as e: if self._file: self._file.close() raise e def progress(self): return self._file.tell() def __iter__(self): self._text_stream.seek(0) reader = csv.reader(self._text_stream, self._dialect) return reader.__iter__() def close(self): try: self._file.close() except Exception: pass
def read_stream_with_progress( stream: TextIOWrapper, progress_label: str, length: int = None, reader=None ): length = length or sum(1 for _ in stream) reader = reader or stream stream.seek(0) click.secho(f"Found {length} lines") with click.progressbar( reader, length=length, label=progress_label ) as progress_reader: yield progress_reader
def matchCatcher(self, fileHandle: TextIOWrapper): try: pos = fileHandle.tell() text = fileHandle.readline() matcher = self.pattern.match(text) if matcher is None: return False return True finally: if self.catched: fileHandle.seek(pos)
def _detect_separator(textio: io.TextIOWrapper) -> str: """ Detect most common char of '\t', ';', ',' in first MB TODO: Could be a tie or no counts at all, keep going until you find a winner """ map = [',', ';', '\t'] chunk = textio.read(settings.SEP_DETECT_CHUNK_SIZE) textio.seek(0) results = [chunk.count(x) for x in map] return map[results.index(max(results))]
def __init__(self, bucket, key, message=None, fieldnames=['Bucket', 'Key', 'Size']): self.fieldnames = fieldnames self.s3 = boto3.client('s3') self.message = message if message is not None: hb = Heartbeat( int(message.Queue().attributes.get('VisibilityTimeout', 30)) - 10, message.change_visibility, kwargs={ 'VisibilityTimeout': int(message.Queue().attributes.get('VisibilityTimeout', 30)) }) atexit.register(hb.cancel) hb.start() self.bucket = bucket self.key = key if key[-1:].isdigit(): self.name = os.path.basename(key) else: self.name = os.path.splitext(os.path.basename(key))[0] self.manifestcsv = tempfile.TemporaryFile() try: self.s3.download_fileobj(self.bucket, self.key, self.manifestcsv) except botocore.exceptions.ClientError as e: logger.error( "ERROR: Failed to download manifest: s3://{}/{}".format( self.bucket, self.key)) logger.debug("Exception: %s", e, exc_info=True) sys.exit(5) self.manifestcsv.seek(0) TextIOmanifestcsv = TextIOWrapper(self.manifestcsv) try: self.manifestreader = csv.DictReader(TextIOmanifestcsv, fieldnames=fieldnames) except csv.Error as e: logger.error("ERROR: Failed to open manifest: s3://{}/{}".format( self.bucket, self.key)) logger.debug("Exception: %s", e, exc_info=True) sys.exit(3) firstline = self.manifestreader.__next__() self.sourcebucket = firstline['Bucket'] TextIOmanifestcsv.seek(0) logger.error("begin processing s3://{}/{}".format( self.bucket, self.key))
def _read_lines(self, f: io.TextIOWrapper) -> None: while True: self._state.offset = f.tell() line = f.readline() if line: line = line.strip() self._handle_string(line) else: self._stop_if_file_was_deleted_or_recreated() self._sleep_and_maybe_stop() f.seek(self._state.offset)
def test_io_wrapper(self): content = "vive l'été\n" with tempfile.TemporaryFile() as temp, File(temp, name='something.txt') as test_file: test_file.write(content.encode()) test_file.seek(0) wrapper = TextIOWrapper(test_file, 'utf-8', newline='\n') self.assertEqual(wrapper.read(), content) wrapper.write(content) wrapper.seek(0) self.assertEqual(wrapper.read(), content * 2) test_file = wrapper.detach() test_file.seek(0) self.assertEqual(test_file.read(), (content * 2).encode())
def add_song_to_archive(stream: TextIOWrapper, youtube_link: str) -> None: """ Add the passed song to the archive file. This method is supposed to be called when the song is considered `worked on` and not just when the song is just downloaded. """ video_id: str = extract_video_id(youtube_link) # Go to the end of the file stream.seek(0, 2) stream.write(f"\n{video_id}")
def test_get_file_size_text_file(): from io import TextIOWrapper test_inner_file = BytesIO() test_file = TextIOWrapper(test_inner_file, encoding="utf-8") test_file.write(u"\u0001F3A9 " * 123) test_file.seek(0) # read 9 *unicode chars* to advance fd to somewhere interesting test_file.read(9) previous_pos = test_file.tell() assert get_file_size(test_file) == 738 assert test_file.tell() == previous_pos
def read_options(f: TextIOWrapper) -> Dict[str, str]: next_option_line = f.tell() options: Dict[str, str] = {} next_line = f.readline() while (next_line.startswith("#")): option_match = re.match("# (.*): (.*)", next_line) assert option_match key, value = option_match.group(1, 2) options[key] = value next_option_line = f.tell() next_line = f.readline() f.seek(next_option_line) return options
def import_csv(self, request): """ Function to bulk import user details from a CSV file """ if request.method == "POST": try: csv_file = TextIOWrapper( request.FILES['csv_file'].file, encoding=request.encoding ) dialect = csv.Sniffer().sniff(csv_file.read()) csv_file.seek(0) reader = csv.DictReader(csv_file, dialect=dialect) except Exception as err: self.message_user(request, "Error: {}".format(err)) return redirect("..") try: # This flag sets "is_staff" boolean staff = False if '/student/' in request.path: user_type = models.Student elif '/faculty/' in request.path: user_type = models.Faculty elif '/labassistant/' in request.path: user_type = models.LabAssistant staff = True else: raise Http404 # If send_email is true, then the new user will receive email send_email = False if request.POST.get( 'send_email') == "No" else True self.create_users(user_type, reader, staff, send_email) except Exception as err: messages.error( request, f'Error on row number {reader.line_num}: {err}') else: messages.success(request, "Your csv file has been imported") return redirect("..") else: form = forms.BulkImportForm() payload = {"form": form} return render( request, "admin/bulk_import_form.html", payload )
def test_io_wrapper(self): content = "vive l'été\n" with tempfile.TemporaryFile() as temp, File(temp, name='something.txt') as test_file: test_file.write(content.encode('utf-8')) test_file.seek(0) wrapper = TextIOWrapper(test_file, 'utf-8', newline='\n') self.assertEqual(wrapper.read(), content) # The following seek() call is required on Windows Python 2 when # switching from reading to writing. wrapper.seek(0, 2) wrapper.write(content) wrapper.seek(0) self.assertEqual(wrapper.read(), content * 2) test_file = wrapper.detach() test_file.seek(0) self.assertEqual(test_file.read(), (content * 2).encode('utf-8'))
def redirect(*params, **kwargs): if sys.version_info[0] == 3: captured_stdout = TextIOWrapper(BytesIO(), sys.stdout.encoding) else: captured_stdout = BytesIO() old_stdout = sys.stdout sys.stdout = captured_stdout try: ret_val = f(*params, **kwargs) except Exception as e: raise YasufRuntimeException(repr(e)) sys.stdout = old_stdout captured_stdout.seek(0) return captured_stdout, ret_val
class LuteusOP(OptionParser): def __init__(self, *args, **kwargs): from io import BytesIO, TextIOWrapper self.file_b = BytesIO() self.file_t = TextIOWrapper(self.file_b) OptionParser.__init__(self, *args, **kwargs) def exit(self, status=0, msg=None, *args, **kwargs): if (msg): f = self.file_t f.write(msg) f.flush() raise LuteusOPBailout() def _pick_file(self, file): return self.file_t def print_usage(self, f=None): f = self._pick_file(f) OptionParser.print_usage(self, f) f.flush() def print_help(self, f=None): f = self._pick_file(f) OptionParser.print_help(self, f) f.flush() def print_version(self, f=None): f = self._pick_file(f) OptionParser.print_version(self, self._pick_file(f)) f.flush() def output_lines(self, output): """Write lines cached output by calling output(line) for each line.""" out_val = self.file_b.getvalue() if not (out_val): return out_lines = out_val.strip(b'\n').split(b'\n') for line in out_lines: output(line, width=None) def clear_output(self): """Discard cached output.""" self.file_b.seek(0) self.file_b.truncate() self.file_t.seek(0) self.file_t.truncate()
def make_repo(self, revs): dump = BytesIO() dump_message(dump, (("SVN-fs-dump-format-version", "2"),)) dump_message(dump, ( ("UUID", "00000000-0000-0000-0000-000000000000"),)) for (i, rev) in enumerate(revs, 1): props = { "svn:date": "1970-01-01T00:00:00.000000Z", "svn:log": "", } props.update(rev.setdefault("props", dict())) headers = (("Revision-number", format(i)),) dump_message(dump, headers, props=props) for node in rev.setdefault("nodes", {}): headers = list() for name in ( "action", "kind", "path", "copyfrom-path", "copyfrom-rev", ): value = node.get(name.replace("-", "_")) if value is not None: headers.append(("Node-" + name, format(value))) dump_message(dump, headers, props=node.get("props"), content=node.get("content")) dump.seek(0) log = TextIOWrapper(BytesIO(), "ascii") log.write("<log>") for [i, rev] in enumerate(reversed(revs)): i = format(len(revs) - i) log.write(f"<logentry revision={saxutils.quoteattr(i)}>") author = rev["props"].get("svn:author") if author is not None: log.write(f"<author>{saxutils.escape(author)}</author>") log.write("<date>1970-01-01T00:00:00.000000Z</date><paths>") for node in rev["nodes"]: action = {"add": "A", "change": "M", "delete": "D"}[node['action']] log.write(f"<path action={saxutils.quoteattr(action)}>/{saxutils.escape(node['path'])}</path>") log.write("</paths></logentry>") log.write("</log>") log.seek(0) return (dump, patch("svnex.stdin", log))
def fix_headerguard(filename): supposed = get_guard_name(filename) with open(filename, "r", encoding='utf-8', errors='ignore') as f: inlines = f.readlines() tmp = TextIOWrapper(BytesIO(), encoding="utf-8", errors="ignore") tmp.seek(0) guard_found = 0 guard_name = "" ifstack = 0 for line in inlines: if guard_found == 0: if line.startswith("#ifndef"): guard_found += 1 guard_name = line[8:].rstrip() line = "#ifndef %s\n" % (supposed) elif guard_found == 1: if line.startswith("#define") and line[8:].rstrip() == guard_name: line = "#define %s\n" % (supposed) guard_found += 1 else: break elif guard_found == 2: if line.startswith("#if"): ifstack += 1 elif line.startswith("#endif"): if ifstack > 0: ifstack -= 1 else: guard_found += 1 line = "#endif /* %s */\n" % supposed tmp.write(line) tmp.seek(0) if guard_found == 3: for line in difflib.unified_diff(inlines, tmp.readlines(), "%s" % filename, "%s" % filename): sys.stdout.write(line) else: print("%s: no / broken header guard" % filename, file=sys.stderr) return False
def get_reader(self): f = self.files['file'] if f.name.endswith(".xlsx"): return namedtuple_xlsx_reader(f) enc = self.cleaned_data['encoding'] if enc.lower() in ('', 'autodetect'): enc = chardet.detect(f.read(1024))["encoding"] log.info("Guessed encoding: {enc}".format(**locals())) f.seek(0) f = TextIOWrapper(f.file, encoding=enc) d = self.cleaned_data['dialect'] or 'autodetect' if d == 'autodetect': dialect = csv.Sniffer().sniff(f.readline()) f.seek(0) if dialect.delimiter not in "\t,;": dialect = csv.get_dialect('excel') else: dialect = csv.get_dialect(d) return namedtuple_csv_reader(f, dialect=dialect)
class MultiPageTextImporter: def __init__(self, mainControl): """ mainControl -- Currently PersonalWikiFrame object """ self.mainControl = mainControl def getImportTypes(self, guiparent): """ Return sequence of tuples with the description of import types provided by this object. A tuple has the form (<imp. type>, <human readable description>, <panel for add. options or None>) If panels for additional options must be created, they should use guiparent as parent """ if guiparent: res = wx.xrc.XmlResource.Get() mptPanel = res.LoadPanel(guiparent, "ImportSubMultipageText") # ctrls = XrcControls(htmlPanel) # config = self.mainControl.getConfig() # # ctrls.cbPicsAsLinks.SetValue(config.getboolean("main", # "html_export_pics_as_links")) # ctrls.chTableOfContents.SetSelection(config.getint("main", # "export_table_of_contents")) # ctrls.tfHtmlTocTitle.SetValue(config.get("main", # "html_toc_title")) else: mptPanel = None return ( ("multipage_text", _("Multipage text"), mptPanel), ) def getImportSourceWildcards(self, importType): """ If an export type is intended to go to a file, this function returns a (possibly empty) sequence of tuples (wildcard description, wildcard filepattern). If an export type goes to a directory, None is returned """ if importType == "multipage_text": return ((_("Multipage files (*.mpt)"), "*.mpt"), (_("Text file (*.txt)"), "*.txt")) return None def getAddOptVersion(self): """ Returns the version of the additional options information returned by getAddOpt(). If the return value is -1, the version info can't be stored between application sessions. Otherwise, the addopt information can be stored between sessions and can later handled back to the doImport method of the object without previously showing the import dialog. """ return 0 def getAddOpt(self, addoptpanel): """ Reads additional options from panel addoptpanel. If getAddOptVersion() > -1, the return value must be a sequence of simple string, unicode and/or numeric objects. Otherwise, any object can be returned (normally the addoptpanel itself) """ if addoptpanel is None: return (0,) else: ctrls = XrcControls(addoptpanel) showImportTableAlways = boolToInt(ctrls.cbShowImportTableAlways.GetValue()) return (showImportTableAlways,) def _collectContent(self): """ Collect lines from current position of importFile up to separator or file end collect all lines and return them as list of lines. """ content = [] while True: # Read lines of wikiword line = self.importFile.readline() if line == "": # The last page in mpt file without separator # ends as the real wiki page # content = u"".join(content) break if line == self.separator: if len(content) > 0: # Iff last line of mpt page is empty, the original # page ended with a newline, so remove last # character (=newline) content[-1] = content[-1][:-1] # content = u"".join(content) break content.append(line) return "".join(content) def _skipContent(self): """ Skip content until reaching next separator or end of file """ while True: # Read lines of wikiword line = self.importFile.readline() if line == "": # The last page in mpt file without separator # ends as the real wiki page break if line == self.separator: break def doImport(self, wikiDocument, importType, importSrc, compatFilenames, addOpt, importData=None): """ Run import operation. wikiDocument -- WikiDocument object importType -- string tag to identify how to import importSrc -- Path to source directory or file to import from compatFilenames -- Should the filenames be decoded from the lowest level compatible? addOpt -- additional options returned by getAddOpt() importData -- if not None contains data to import as bytestring. importSrc is ignored in this case. Needed for trashcan. returns True if import was done (needed for trashcan) """ if importData is not None: self.rawImportFile = BytesIO(importData) # TODO bytes or string??? else: try: self.rawImportFile = open(pathEnc(importSrc), "rb") except IOError: raise ImportException(_("Opening import file failed")) self.wikiDocument = wikiDocument self.tempDb = None showImportTableAlways = addOpt[0] # wikiData = self.wikiDocument.getWikiData() # TODO Do not stop on each import error, instead create error list and # continue try: try: # Wrap input file to convert format bom = self.rawImportFile.read(len(BOM_UTF8)) if bom != BOM_UTF8: self.rawImportFile.seek(0) self.importFile = TextIOWrapper(self.rawImportFile, MBCS_ENCODING, "replace") else: self.importFile = TextIOWrapper(self.rawImportFile, "utf-8", "replace") line = self.importFile.readline() if line.startswith("#!"): # Skip initial line with #! to allow execution as shell script line = self.importFile.readline() if not line.startswith("Multipage text format "): raise ImportException( _("Bad file format, header not detected")) # Following in the format identifier line is a version number # of the file format self.formatVer = int(line[22:-1]) if self.formatVer > 1: raise ImportException( _("File format number %i is not supported") % self.formatVer) # Next is the separator line line = self.importFile.readline() if not line.startswith("Separator: "): raise ImportException( _("Bad file format, header not detected")) self.separator = line[11:] startPos = self.importFile.tell() if self.formatVer == 0: self._doImportVer0() elif self.formatVer == 1: # Create temporary database. It is mainly filled during # pass 1 to check for validity and other things before # actual importing in pass 2 # TODO Respect settings for general temp location!!! self.tempDb = ConnectWrapSyncCommit(sqlite3.connect("")) try: # TODO: Remove column "collisionWithPresent", seems to be unused self.tempDb.execSql("create table entries(" "unifName text primary key not null, " # Unified name in import file "seen integer not null default 0, " # data really exists "dontImport integer not null default 0, " # don't import this (set between pass 1 and 2) "missingDep integer not null default 0, " # missing dependency(ies) "importVersionData integer not null default 0, " # versioning data present # "neededBy text default ''," # "versionContentDifferencing text default ''," "collisionWithPresent text not null default ''," # Unif. name of present entry which collides with imported one (if any) "renameImportTo text not null default ''," # Rename imported element to (if at all) "renamePresentTo text not null default ''" # Rename present element in database to (if at all) ");" ) # Dependencies. If unifName isn't imported (or faulty), neededBy shouldn't be either self.tempDb.execSql("create table depgraph(" "unifName text not null default ''," "neededBy text not null default ''," "constraint depgraphpk primary key (unifName, neededBy)" ");" ) # Recursive processing is not supported for this table self.tempDb.execSql("create table renamegraph(" "unifName text not null default ''," "dependent text not null default ''," "constraint renamegraphpk primary key (unifName, dependent)," "constraint renamegraphsingledep unique (dependent)" ");" ) # Collect some initial information into the temporary database self._doImportVer1Pass1() # Draw some logical conclusions on the temp db self._markMissingDependencies() self._markHasVersionData() self._markCollision() # Now ask user if necessary if showImportTableAlways or self._isUserNeeded(): if not self._doUserDecision(): # Canceled by user return False # Further logical processing after possible user editing self._markNonImportedVersionsData() self._markNonImportedDependencies() self._propagateRenames() # TODO: Remove version data without ver. overview or main data # Back to start of import file and import according to settings # in temp db self.importFile.seek(startPos) self._doImportVer1Pass2() return True finally: self.tempDb.close() self.tempDb = None except ImportException: raise except Exception as e: traceback.print_exc() raise ImportException(str(e)) finally: self.importFile.close() def _markMissingDependencies(self): """ If a datablock wasn't present, all dependent data blocks are marked as not to import """ while True: self.tempDb.execSql(""" update entries set missingDep=1, dontImport=1 where (not missingDep) and unifName in (select depgraph.neededBy from depgraph inner join entries on depgraph.unifName = entries.unifName where (not entries.seen) or entries.missingDep); """) if self.tempDb.rowcount == 0: break def _markHasVersionData(self): """ Mark if version data present """ self.tempDb.execSql(""" update entries set importVersionData=1 where (not importVersionData) and unifName in (select substr(unifName, 21) from entries where unifName glob 'versioning/overview/*' and not dontImport) """) # TODO Take missing deps into account here? # self.tempDb.execSql("insert or replace into entries(unifName, importVersionData) " # "values (?, 1)", (depunifName,)) def _markCollision(self): """ Mark collisions between existing and data blocks and such to import """ # First find collisions with wiki words for wikipageUnifName in self.tempDb.execSqlQuerySingleColumn( "select unifName from entries where unifName glob 'wikipage/*' " "and not dontImport"): wpName = wikipageUnifName[9:] if not self.wikiDocument.isDefinedWikiPageName(wpName): continue self.tempDb.execSql("update entries set collisionWithPresent = ? " "where unifName = ?", (wikipageUnifName, wikipageUnifName)) # (u"wikipage/" + collisionWithPresent, wikipageUnifName)) # Then find other collisions (saved searches etc.) for unifName in self.tempDb.execSqlQuerySingleColumn( "select unifName from entries where (unifName glob 'savedsearch/*' " "or unifName glob 'savedpagesearch/*') and not dontImport"): if self.wikiDocument.hasDataBlock(unifName): self.tempDb.execSql("update entries set collisionWithPresent = ? " "where unifName = ?", (unifName, unifName)) def _markNonImportedVersionsData(self): """ After user dialog: If importVersionData is false for some entries the depending version data shouldn't be imported. Only the versioning overview is marked for not importing. The next step propagates this to the other data blocks """ self.tempDb.execSql(""" update entries set dontImport = 1 where unifName in (select 'versioning/overview/' || unifName from entries where not importVersionData) """) # # Vice versa the importVersionData column must be updated if # self.tempDb.execSql(""" # update entries set importVersionData = 0 where importVersionData # and ('versioning/overview/' || unifName) in (select unifName # from entries where dontImport) # """) def _markNonImportedDependencies(self): """ After user dialog: If some data blocks where chosen not to import mark all dependent blocks to not import also (especially version data) """ while True: self.tempDb.execSql(""" update entries set dontImport=1 where (not dontImport) and unifName in (select depgraph.neededBy from depgraph inner join entries on depgraph.unifName = entries.unifName where entries.dontImport); """) if self.tempDb.rowcount == 0: break def _propagateRenames(self): """ Write rename commands for imported items to all parts to import if some parts need renaming. Renaming of present items is not propagated. """ for unifName, renImportTo in self.tempDb.execSqlQuery( "select unifName, renameImportTo from entries " "where renameImportTo != '' and not dontImport"): for depUnifName in self.tempDb.execSqlQuerySingleColumn( "select dependent from renamegraph where unifName = ? and " "dependent in (select unifName from entries where " "not dontImport)", (unifName,)): if depUnifName.endswith(unifName): newName = depUnifName[:-len(unifName)] + renImportTo self.tempDb.execSql(""" update entries set renameImportTo=? where unifName = ? """, (newName, depUnifName)) def _doUserDecision(self): """ Called to present GUI to user for deciding what to do. This method is overwritten for trashcan GUI. Returns False if user canceled operation """ return MultiPageTextImporterDialog.runModal( self.mainControl, self.tempDb, self.mainControl) def _isUserNeeded(self): """ Decide if a dialog must be shown to ask user how to proceed. Under some circumstances the dialog may be shown regardless of the result. """ if self.tempDb.execSqlQuerySingleItem("select missingDep from entries " "where missingDep limit 1", default=False): # Missing dependency return True if len(self.tempDb.execSqlQuerySingleItem("select collisionWithPresent " "from entries where collisionWithPresent != '' limit 1", default="")) > 0: # Name collision return True # No problems found return False def _doImportVer0(self): """ Import wikiwords if format version is 0. """ langHelper = wx.GetApp().createWikiLanguageHelper( self.wikiDocument.getWikiDefaultWikiLanguage()) while True: # Read next wikiword line = self.importFile.readline() if line == "": break wikiWord = line[:-1] errMsg = langHelper.checkForInvalidWikiWord(wikiWord, self.wikiDocument) if errMsg: raise ImportException(_("Bad wiki word: %s, %s") % (wikiWord, errMsg)) content = self._collectContent() page = self.wikiDocument.getWikiPageNoError(wikiWord) page.replaceLiveText(content) def _doImportVer1Pass1(self): while True: tag = self.importFile.readline() if tag == "": # End of file break tag = tag[:-1] if tag.startswith("funcpage/"): self._skipContent() elif tag.startswith("savedsearch/"): self._skipContent() elif tag.startswith("savedpagesearch/"): self._skipContent() elif tag.startswith("wikipage/"): self._skipContent() elif tag.startswith("versioning/overview/"): self._doImportItemVersioningOverviewVer1Pass1(tag[20:]) elif tag.startswith("versioning/packet/versionNo/"): self._skipContent() else: # Unknown tag -> Ignore until separator self._skipContent() continue self.tempDb.execSql("insert or replace into entries(unifName, seen) " "values (?, 1)", (tag,)) def _readHintedDatablockVer1(self): """ Reads datablock and preprocesses encoding if necessary. Returns either (hintStrings, content) or (None, None) if either an unknown important hint was found or if encoding had an error. hintStrings is a list of hints (as unistrings) which were not processed by the function (therefore encoding hint is removed). content can be a bytestring or a unistring. If (None, None) is returned, the remaining content of the entry was skipped already by the function. """ hintLine = self.importFile.readline()[:-1] hintStrings = hintLine.split(" ") resultHintStrings = [] # Set default useB64 = False # Process hints for hint in hintStrings: if hint.startswith("important/encoding/"): if hint[19:] == "text": useB64 = False elif hint[19:] == "base64": useB64 = True else: # Unknown encoding: don't read further self._skipContent() return None, None elif hint.startswith("important/"): # There is something important we do not understand self._skipContent() return None, None else: resultHintStrings.append(hint) content = self._collectContent() if useB64: try: content = base64BlockDecode(content) except TypeError: # base64 decoding failed self._skipContent() return None, None return (resultHintStrings, content) def _doImportItemVersioningOverviewVer1Pass1(self, subtag): hintStrings, content = self._readHintedDatablockVer1() if content is None: return # Always encode to UTF-8 no matter what the import file encoding is content = content.encode("utf-8") try: ovw = Versioning.VersionOverview(self.wikiDocument, unifiedBasePageName=subtag) ovw.readOverviewFromBytes(content) ovwUnifName = ovw.getUnifiedName() self.tempDb.execSql("insert or replace into depgraph(unifName, neededBy) " "values (?, ?)", (subtag, ovwUnifName)) self.tempDb.execSql("insert or replace into renamegraph(unifName, dependent) " "values (?, ?)", (subtag, ovwUnifName)) for depUnifName in ovw.getDependentDataBlocks(omitSelf=True): # Mutual dependency between version overview and each version packet self.tempDb.execSql("insert or replace into depgraph(unifName, neededBy) " "values (?, ?)", (depUnifName, ovwUnifName)) self.tempDb.execSql("insert or replace into depgraph(unifName, neededBy) " "values (?, ?)", (ovwUnifName, depUnifName)) self.tempDb.execSql("insert or replace into renamegraph(unifName, dependent) " "values (?, ?)", (subtag, depUnifName)) # self.tempDb.execSql("insert or replace into entries(unifName, needed) " # "values (?, 1)", (depUnifName,)) except VersioningException: return def _doImportVer1Pass2(self): wikiDoc = self.wikiDocument # We have to rename present items # First wikipages because this automatically renames depending version data for pageFrom, pageTo in self.tempDb.execSqlQuery( """ select substr(unifName, 10), substr(renamePresentTo, 10) from entries where unifName glob 'wikipage/*' and renamePresentTo glob 'wikipage/*' """): if wikiDoc.isDefinedWikiPageName(pageFrom): wikiDoc.renameWikiWords({pageFrom: pageTo}, Consts.ModifyText.off) # TODO How to handle rename of home page? # Then remaining data blocks for oldUnifName, newUnifName in self.tempDb.execSqlQuery( """ select unifName, renamePresentTo from entries where unifName not glob 'wikipage/*' and renamePresentTo != '' """): wikiDoc.renameDataBlock(oldUnifName, newUnifName) # For wiki pages with versions to import, existing versions must be # deleted for wikiWord in self.tempDb.execSqlQuerySingleColumn( """ select substr(unifName, 10) from entries where unifName glob 'wikipage/*' and renameImportTo == '' and not dontImport and importVersionData union select substr(renameImportTo, 10) from entries where unifName glob 'wikipage/*' and renameImportTo glob 'wikipage/*' and not dontImport and importVersionData """): if not wikiDoc.isDefinedWikiPageName(wikiWord): continue page = wikiDoc.getWikiPage(wikiWord) versionOverview = page.getExistingVersionOverview() if versionOverview is not None: versionOverview.delete() while True: tag = self.importFile.readline() if tag == "": # End of file break tag = tag[:-1] # Remove line end try: dontImport, renameImportTo = \ self.tempDb.execSqlQuery( "select dontImport, renameImportTo from " "entries where unifName = ?", (tag,))[0] except IndexError: # Maybe dangerous traceback.print_exc() self._skipContent() continue if dontImport: self._skipContent() continue if renameImportTo == "": renameImportTo = tag if tag.startswith("wikipage/"): self._importItemWikiPageVer1Pass2(renameImportTo[9:]) elif tag.startswith("funcpage/"): self._importItemFuncPageVer1Pass2(tag[9:]) elif tag.startswith("savedsearch/"): self._importB64DatablockVer1Pass2(renameImportTo) elif tag.startswith("savedpagesearch/"): self._importHintedDatablockVer1Pass2(renameImportTo) elif tag.startswith("versioning/"): self._importHintedDatablockVer1Pass2(renameImportTo) else: # Unknown tag -> Ignore until separator self._skipContent() for wikiWord in self.tempDb.execSqlQuerySingleColumn( """ select substr(unifName, 10) from entries where unifName glob 'wikipage/*' and renamePresentTo == '' and importVersionData union select substr(renamePresentTo, 10) from entries where unifName glob 'wikipage/*' and renamePresentTo glob 'wikipage/*' and importVersionData """): if not wikiDoc.isDefinedWikiPageName(wikiWord): continue page = wikiDoc.getWikiPage(wikiWord) versionOverview = page.getExistingVersionOverview() if versionOverview is not None: versionOverview.readOverview() def _importItemWikiPageVer1Pass2(self, wikiWord): timeStampLine = self.importFile.readline()[:-1] timeStrings = timeStampLine.split(" ") if len(timeStrings) < 3: traceback.print_exc() self._skipContent() return # TODO Report error timeStrings = timeStrings[:3] try: timeStrings = [str(ts) for ts in timeStrings] except UnicodeEncodeError: traceback.print_exc() self._skipContent() return # TODO Report error try: timeStamps = [timegm(time.strptime(ts, "%Y-%m-%d/%H:%M:%S")) for ts in timeStrings] except (ValueError, OverflowError): traceback.print_exc() self._skipContent() return # TODO Report error content = self._collectContent() page = self.wikiDocument.getWikiPageNoError(wikiWord) # TODO How to handle versions here? page.replaceLiveText(content) if page.getTxtEditor() is not None: page.writeToDatabase() page.setTimestamps(timeStamps) def _importItemFuncPageVer1Pass2(self, subtag): # The subtag is functional page tag try: # subtag is unicode but func tags are bytestrings subtag = str(subtag) except UnicodeEncodeError: self._skipContent() return content = self._collectContent() try: page = self.wikiDocument.getFuncPage(subtag) page.replaceLiveText(content) except BadFuncPageTagException: # This function tag is bad or unknown -> ignore return # TODO Report error def _importB64DatablockVer1Pass2(self, unifName): # Content is base64 encoded b64Content = self._collectContent() try: datablock = base64BlockDecode(b64Content) self.wikiDocument.getWikiData().storeDataBlock(unifName, datablock, storeHint=Consts.DATABLOCK_STOREHINT_INTERN) except TypeError: # base64 decoding failed return # TODO Report error def _importTextDatablockVer1Pass2(self, unifName): content = self._collectContent() try: self.wikiDocument.getWikiData().storeDataBlock(unifName, content, storeHint=Consts.DATABLOCK_STOREHINT_INTERN) except TypeError: return # TODO Report error def _importHintedDatablockVer1Pass2(self, unifName): """ A hinted datablock starts with an extra line defining encoding (text or B64) and storage hint. It was introduced later therefore only versioning packets use this while saved searches don't. """ hintStrings, content = self._readHintedDatablockVer1() if hintStrings is None: return # Set defaults storeHint = Consts.DATABLOCK_STOREHINT_INTERN # Process hints for hint in hintStrings: if hint.startswith("storeHint/"): if hint[10:] == "extern": storeHint = Consts.DATABLOCK_STOREHINT_EXTERN elif hint[10:] == "intern": storeHint = Consts.DATABLOCK_STOREHINT_INTERN # No else. It is not vital to get the right storage hint try: if isinstance(content, str): content = BOM_UTF8 + content.encode("utf-8") self.wikiDocument.getWikiData().storeDataBlock(unifName, content, storeHint=storeHint) except TypeError: traceback.print_exc() return # TODO Report error
class Map: # reads specified csv file def __init__(self, request): self.request = request #self.rows = DictReader(TextIOWrapper(self.request.FILES['file'].file, encoding='UTF-8')) self.f = TextIOWrapper(self.request.FILES['file'].file, encoding='UTF-8') # look for morphdata file type and associated preheader lines to skip saw_morphdata = 0 lines_to_skip = 0 self.rows = reader(self.f, delimiter=',') for row in self.rows: if row[0] == '1.0 \\alpha': saw_morphdata = 1 lines_to_skip = 1 else: if saw_morphdata == 1: if row[0] == 'Class Status': break lines_to_skip = lines_to_skip + 1 else: break self.f.seek(0) # rewind the file if saw_morphdata == 1: # skip preheader lines if morphdata while lines_to_skip > 0: next(self.f) # read next line in file lines_to_skip = lines_to_skip - 1 self.rows = DictReader(self.f) # from the command line, ingests the all.csv file and processes the contained list of files def all_csv(self, dev=None): module_dir = os.path.dirname(__file__) # get current directory #example before sub: module_dir = '/Users/djh/wd/krasnow/csv2db/lib' module_dir = re.sub(r'csv2db/lib', r'static/csv2db/dat', module_dir) #example after sub : module_dir = '/Users/djh/wd/krasnow/static/csv2db/dat' if dev is None: all_csv_filename = 'all.csv' all_csv_file_path = os.path.join(module_dir, all_csv_filename) all_csv_file_buffer = open(all_csv_file_path, 'rb') self.rows = DictReader(TextIOWrapper(all_csv_file_buffer, encoding='UTF-8')) self.stdout.write('%s begin... %s' % (dt.now(), all_csv_file_path)) Map.all_to_all(self) self.stdout.write('%s .....end %s' % (dt.now(), all_csv_file_path)) elif (dev == 'false') or (dev == 'true'): type_csv_filename = 'type.csv' type_csv_file_path = os.path.join(module_dir, type_csv_filename) type_csv_file_buffer = open(type_csv_file_path, 'rb') self.rows = DictReader(TextIOWrapper(type_csv_file_buffer, encoding='UTF-8')) self.stdout.write('%s begin... %s' % (dt.now(), type_csv_file_path)) Map.type_to_type(self, dev) self.stdout.write('%s .....end %s' % (dt.now(), type_csv_file_path)) elif dev == 'term': term_csv_filename = 'term.csv' term_csv_file_path = os.path.join(module_dir, term_csv_filename) term_csv_file_buffer = open(term_csv_file_path, 'rb') self.rows = DictReader(TextIOWrapper(term_csv_file_buffer, encoding='UTF-8')) self.stdout.write('%s begin... %s' % (dt.now(), term_csv_file_path)) Map.term_to_term(self) self.stdout.write('%s .....end %s' % (dt.now(), term_csv_file_path)) else: pass # ingests the all.csv file and processes the contained list of files def all_to_all(self): process_order = [] csv_filenames = [] module_dir = os.path.dirname(__file__) # get current directory #example before sub: module_dir = '/Users/djh/wd/krasnow/csv2db/lib' module_dir = re.sub(r'csv2db/lib', r'static/csv2db/dat', module_dir) #example after sub : module_dir = '/Users/djh/wd/krasnow/static/csv2db/dat' for row in self.rows: process_order.append(row['process order']) csv_filenames.append(row['csv filename']) for order, csv_filename in zip(process_order, csv_filenames): csv_file_path = os.path.join(module_dir, csv_filename) csv_file_buffer = open(csv_file_path, 'rb') #self.rows = DictReader(TextIOWrapper(csv_file_buffer, encoding='UTF-8')) self.f = TextIOWrapper(csv_file_buffer, encoding='UTF-8') # look for morphdata file type and associated preheader lines to skip saw_morphdata = 0 lines_to_skip = 0 self.rows = reader(self.f, delimiter=',') for row in self.rows: if row[0] == '1.0 \\alpha': saw_morphdata = 1 lines_to_skip = 1 else: if saw_morphdata == 1: if row[0] == 'Class Status': break lines_to_skip = lines_to_skip + 1 else: break self.f.seek(0) # rewind the file if saw_morphdata == 1: # skip preheader lines if morphdata while lines_to_skip > 0: next(self.f) # read next line in file lines_to_skip = lines_to_skip - 1 #skip 3 lines for markerdata if csv_filename=='markerdata.csv': lines_to_skip_marker=3 while lines_to_skip_marker > 0: next(self.f) # read next line in file lines_to_skip_marker = lines_to_skip_marker - 1 self.rows = DictReader(self.f) #material_method if csv_filename=='material_method.csv': self.rows = reader(self.f, delimiter=',') try: self.stdout.write('%s begin... [%02s] %s' % (dt.now(), order, csv_file_path)) except AttributeError: pass if order == '1': #dev = 'true' dev = 'false' Map.type_to_type(self, dev) elif order == '2': Map.notes_to_type(self) elif order == '3': Map.connection_to_connection(self) elif order == '4': Map.synonym_to_synonym(self) elif order == '5': Map.article_to_article(self) elif order == '6': Map.attachment_to_attachment(self) elif order == '7': Map.attachment_to_attachment(self) elif order == '8': Map.attachment_to_attachment(self) elif order == '9': FiringPatternStringField.attachment_fp_to_attachment_fp(self) elif order == '10': Map.fragment_to_fragment(self) elif order == '11': Map.fragment_to_fragment(self) elif order == '12': Map.fragment_to_fragment(self) elif order == '13': FiringPatternStringField.fp_fragment_to_fp_fragment(self) elif order == '14': Map.markerdata_to_markerdata(self) elif order == '15': Map.epdata_to_epdata(self) elif order == '16': Map.morphdata_to_morphdata(self) elif order == '17': FiringPatternStringField.definition_to_definition(self) elif order == '18': FiringPatternStringField.parameters_to_parameters(self) elif order == '19': FiringPatternStringField.materials_to_method(self) elif order == '20': Map.connfragment_to_connfragment(self) elif order == '21': Map.conndata_to_conndata(self) elif order == '22': Map.term_to_term(self) elif order == '23': Map.onhold_to_onhold(self) else: pass try: self.stdout.write('%s .....end [%02s] %s' % (dt.now(), order, csv_file_path)) except AttributeError: pass csv_file_buffer.close() # ingests article.csv and populates Article, ArticleAuthorRel, Author def article_to_article(self): # and article_to_author pmid_isbn_reads = [] first_page_reads = [] name_list = [] # authors article_id = 0 for row in self.rows: pmid_isbn = row['pmid_isbn'].replace('-','') pmcid = row['pmcid'] if len(pmcid) == 0: pmcid = None nihmsid = row['nihmsid'] if len(nihmsid) == 0: nihmsid = None doi = row['doi'] if len(doi) == 0: doi = None try: open_access = int(row['open_access']) except ValueError: open_access = None title = row['title'] if len(title) == 0: title = None publication = row['publication'] if len(publication) == 0: publication = None volume = row['volume'] if len(volume) == 0: volume = None issue = row['issue'] if len(issue) == 0: issue = None try: first_page = int(row['first_page']) except ValueError: first_page = None try: last_page = int(row['last_page']) except ValueError: last_page = None year = row['year'] if len(year) == 0: year = None try: citation_count = int(row['citation_count']) except ValueError: citation_count = None row_object = Article( pmid_isbn = pmid_isbn, pmcid = pmcid, nihmsid = nihmsid, doi = doi, open_access = open_access, title = title, publication = publication, volume = volume, issue = issue, first_page = first_page, last_page = last_page, year = year, citation_count = citation_count ) # check for dups in article.csv and only continue processing if new saw_article = 0 for pmid_isbn_read, first_page_read in zip(pmid_isbn_reads, first_page_reads): if (pmid_isbn_read == pmid_isbn) and (first_page_read == first_page): saw_article = 1 if saw_article == 0: row_object.save() article_id = article_id + 1 pmid_isbn_reads.append(pmid_isbn) first_page_reads.append(first_page) # article_to_author auth_string = row['authors'] auth_list = auth_string.split(',') author_pos = 0 for auth in auth_list: name = auth.strip() if name not in name_list: row_object = Author( name = name ) row_object.save() name_list.append(name) # ArticleAuthorRel row_object = ArticleAuthorRel( Author_id = name_list.index(name) + 1, Article_id = article_id, author_pos = author_pos ) row_object.save() author_pos = author_pos + 1 #end for auth in auth_list: #end if saw_article == 0: # end for row in self.rows: #end def article_to_article(self): # and article_to_author # ingests attachment_morph.csv, attachment_marker.csv, attachment_ephys.csv and populates Attachment, FragmentTypeRel def attachment_to_attachment(self): is_attachment_morph_csv = 0 for row in self.rows: # is this an attachment_morph.csv file or not try: priority = row['Representative?'] is_attachment_morph_csv = 1 except Exception: is_attachment_morph_csv = 0 break self.f.seek(0) # rewind the file self.rows = DictReader(self.f) for row in self.rows: try: # set cell_identifier cell_identifier = int(row['Cell Identifier']) # set quote_reference_id try: quote_reference_id = int(row['Quote reference id']) except ValueError: quote_reference_id = None # set name_of_file_containing_figure name_of_file_containing_figure = row['Name of file containing figure'] if len(name_of_file_containing_figure) == 0: name_of_file_containing_figure = None # set figure_table figure_table = row['Figure/Table'] if is_attachment_morph_csv == 1: if figure_table == 'figure': figure_table = 'morph_figure' elif figure_table == 'table': figure_table = 'morph_table' # set parameter parameter = None try: parameter = row['Parameter'].strip() if len(parameter) == 0: parameter = None else: # map Attachment parameter value to Fragment parameter value parameters_attachment = ('Vrest', 'Rin', 'tau', 'Vthresh', 'fAHP', 'APamplitude', 'APwidth', 'maxFR', 'sAHP', 'sag') parameters_fragment = ('Vrest', 'Rin', 'tau', 'Vthresh', 'fAHP', 'APampl' , 'APwidth', 'maxFR', 'sAHP', 'sag') p = 0 for parameter_attachment in parameters_attachment: if parameter == parameter_attachment: parameter = parameters_fragment[p] break else: p = p + 1 except Exception: parameter = None # set protocol_tag try: protocol_tag = row['Protocol_tag'].strip() if len(protocol_tag) == 0: protocol_tag = None except Exception: protocol_tag = None # set interpretation_notes try: interpretation_notes = row['Interpretation notes figures'].strip() if len(interpretation_notes) == 0: interpretation_notes = None except Exception: interpretation_notes = None # write Attachment record row_object = Attachment( cell_id = cell_identifier, original_id = quote_reference_id, name = name_of_file_containing_figure, type = figure_table, parameter = parameter, protocol_tag = protocol_tag, interpretation_notes = interpretation_notes ) row_object.save() # write FragmentTypeRel row if is_attachment_morph_csv == 1: priority = row['Representative?'] row_object = None if priority == '1': row_object = FragmentTypeRel(Type_id=cell_identifier,priority=1) else: row_object = FragmentTypeRel(Type_id=cell_identifier,priority=None) row_object.save() else: priority = None except ValueError: cell_identifier = None # ingests known_connections.csv and populates TypeTypeRel def connection_to_connection(self): for row in self.rows: try: Type1_id = int(row['Source class identity']) Type2_id = int(row['Target class identity']) except ValueError: continue connection_status_string = row['Connection?'] connection_status = 'negative' if connection_status_string == '1': connection_status = 'positive' connection_location_string = row['Target layer'] #connection_locations = connection_location_string.split(',') # was using ',' separator in original version of known_connections.csv connection_locations = connection_location_string.split(';') # now using ';' separator in new version of known_connections.csv for connection_location in connection_locations: try: row_object = TypeTypeRel.objects.get(Type1_id=Type1_id,Type2_id=Type2_id,connection_status=connection_status,connection_location=connection_location.strip()) except TypeTypeRel.DoesNotExist: row_object = TypeTypeRel(Type1_id=Type1_id,Type2_id=Type2_id,connection_status=connection_status,connection_location=connection_location.strip()) row_object.save() # ingests conndata.csv and populate Conndata table def conndata_to_conndata(self): row_num=1 # starting header offset for row in self.rows: row_num=row_num+1 try: Type1_id = int(row['Source_ID']) Type2_id = int(row['Target_ID']) connection_location_string=row['Layers'].strip() connection_status_string = row['Connection?'].strip() reference_id_string=row['RefIDs'].strip() except ValueError: continue if connection_status_string == '1': connection_status = 'positive' elif connection_status_string == '0': connection_status = 'negative' elif connection_status_string == '4': connection_status = 'positive' elif connection_status_string == '5': connection_status = 'positive' elif len(connection_status_string)!=0: try: row_object = ingest_errors.objects.get(field='Connection?',value=connection_status_string,filename='conndata.csv',file_row_num=row_num,comment='invalid connection value') except ingest_errors.DoesNotExist: row_object = ingest_errors(field='Connection?',value=connection_status_string,filename='conndata.csv',file_row_num=row_num,comment='invalid connection value') row_object.save() continue else: continue connection_locations = connection_location_string.split(';') references= reference_id_string.split(';') for connection_location in connection_locations: if len(connection_location)!=0: connection_location=connection_location.strip() # if not exists create connection try: row_object = Conndata.objects.get(Type1_id=Type1_id,Type2_id=Type2_id,connection_status=connection_status,connection_location=connection_location) except Conndata.DoesNotExist: row_object = Conndata(Type1_id=Type1_id,Type2_id=Type2_id,connection_status=connection_status,connection_location=connection_location) row_object.save() Connection_id=row_object.id for reference in references: if len(reference)!=0: reference=reference.strip() ConnFragment_id=None # if reference is not a number if not(reference.isdigit()): try: row_object = ingest_errors.objects.get(field='RefIDs',value=reference,filename='conndata.csv',file_row_num=row_num,comment='invalid reference value') except ingest_errors.DoesNotExist: row_object = ingest_errors(field='RefIDs',value=reference,filename='conndata.csv',file_row_num=row_num,comment='invalid reference value') row_object.save() continue # find whether given reference id exists in the database. try: row_object = ConnFragment.objects.get(original_id=reference) ConnFragment_id=row_object.id except ConnFragment.DoesNotExist: try: row_object = ingest_errors.objects.get(field='RefIDs',value=reference,filename='conndata.csv',file_row_num=row_num,comment='missing reference in conn_fragment.csv') except ingest_errors.DoesNotExist: row_object = ingest_errors(field='RefIDs',value=reference,filename='conndata.csv',file_row_num=row_num,comment='missing reference in conn_fragment.csv') row_object.save() # Add mapping between connection and reference. If reference not found skip that mapping if ConnFragment_id!=None: try: row_object = ConndataFragmentRel.objects.get(Conndata_id=Connection_id,ConnFragment_id=ConnFragment_id) except ConndataFragmentRel.DoesNotExist: row_object = ConndataFragmentRel(Conndata_id=Connection_id,ConnFragment_id=ConnFragment_id) row_object.save() # ingests conn_fragment.csv and populates ArticleEvidenceRel, Evidence, EvidenceFragmentRel, ConnFragment tables def connfragment_to_connfragment(self): row_num=1 # starting header offset row_object = EvidenceFragmentRel.objects.last() fragment_id = row_object.Fragment_id + 1 # initialize from last fragment entry for row in self.rows: row_num=row_num+1 # set reference_id reference_id = None location_in_reference = None quote = None pmid_isbn = None article_id = None ref_id=row['RefID'].strip() try: reference_id = int(ref_id) except Exception: if len(ref_id)!=0: try: row_object = ingest_errors.objects.get(field='RefID',value=ref_id,file_row_num=row_num,filename='conn_fragment.csv') except ingest_errors.DoesNotExist: row_object = ingest_errors(field='RefID',value=ref_id,filename='conn_fragment.csv',file_row_num=row_num,comment='invalid reference value') row_object.save() continue try: quote = row['Quote'] if len(quote) == 0: quote = None except Exception: quote = None try: location_in_reference = row['Location'] if len(location_in_reference) == 0: location_in_reference = None except Exception: location_in_reference = None pmid_isbn_value=row['PMID/ISBN'].strip() try: pmid_isbn = int(pmid_isbn_value.replace('-','')) except Exception: try: row_object = ingest_errors.objects.get(field='PMID/ISBN',value=pmid_isbn_value,file_row_num=row_num,filename='conn_fragment.csv') except ingest_errors.DoesNotExist: row_object = ingest_errors(field='PMID/ISBN',value=pmid_isbn_value,filename='conn_fragment.csv',file_row_num=row_num,comment='invalid pmid/isbn value') row_object.save() pmid_isbn = None if pmid_isbn == None: article_id = None else: try: row_object = Article.objects.filter(pmid_isbn=pmid_isbn).order_by('id').first() except Article.DoesNotExist: article_id = None if row_object == None: article_id = None else: article_id = row_object.id if (article_id == None and pmid_isbn!= None): # write new pmid_isbn to article_not_found try: row_object = article_not_found.objects.get(pmid_isbn=pmid_isbn_value) except article_not_found.DoesNotExist: row_object = article_not_found(pmid_isbn=pmid_isbn_value) row_object.save() # set Fragment try: row_object = ConnFragment.objects.get(original_id=reference_id) continue except ConnFragment.DoesNotExist: row_object = ConnFragment( id = fragment_id, original_id = reference_id, quote = quote, page_location = location_in_reference, pmid_isbn = pmid_isbn, ) row_object.save() fragment_id=row_object.id row_object = Evidence() row_object.save() Evidence_id =row_object.id row_object = EvidenceFragmentRel( Evidence_id = Evidence_id, Fragment_id = fragment_id ) row_object.save() row_object = ArticleEvidenceRel( Article_id = article_id, Evidence_id = Evidence_id ) row_object.save() fragment_id = fragment_id + 1 # end set fragment # ingests epdata.csv and populates ArticleEvidenceRel, ArticleSynonymRel, Epdata, EpdataEvidenceRel, Evidence, EvidenceEvidenceRel, EvidenceFragmentRel, EvidencePropertyTypeRel, Fragment, Property def epdata_to_epdata(self): EpdataPropertyRecords.save() for row in self.rows: try: EpdataStringField.parse_and_save(row) except Exception: break # ingests morph_fragment.csv, marker_fragment.csv, ep_fragment.csv and populates ArticleEvidenceRel, Evidence, EvidenceFragmentRel, Fragment, FragmentTypeRel(updates Fragment_id field) def fragment_to_fragment(self): fragment_id = 1 for row in self.rows: # is this a morph_fragment.csv file or a marker_fragment.csv file is_morph_fragment_csv = 0 saw_protocol_reference = 0 saw_ephys_parameters_extracted = 0 try: protocol_reference = row['Protocol Reference'] saw_protocol_reference = 1 row_object = EvidenceFragmentRel.objects.last() fragment_id = row_object.Fragment_id + 1 # initialize from last morph_fragment.csv entry except Exception: try: ephys_parameters_extracted = row['Ephys Parameters Extracted'] saw_ephys_parameters_extracted = 1 row_object = EvidenceFragmentRel.objects.last() fragment_id = row_object.Fragment_id + 1 # initialize from last morph_fragment.csv entry except Exception: is_morph_fragment_csv = 1 row_object = Evidence() row_object.save() fragment_id = 1 break self.f.seek(0) # rewind the file self.rows = DictReader(self.f) for row in self.rows: fragment_id = FragmentStringField.parse_and_save(row,fragment_id,saw_protocol_reference,saw_ephys_parameters_extracted) #end for row in self.rows: # conditionally update Fragment_id fields in FragmentTypeRel if is_morph_fragment_csv == 1: FragmentTypeRel_row_objects = FragmentTypeRel.objects.all() for FragmentTypeRel_row_object in FragmentTypeRel_row_objects: try: row_object = Attachment.objects.get(id=FragmentTypeRel_row_object.id) original_id = row_object.original_id row_object = Fragment.objects.get(original_id=original_id) Fragment_id = row_object.id row_object = FragmentTypeRel.objects.filter(id=FragmentTypeRel_row_object.id).update(Fragment_id=Fragment_id) except Fragment.DoesNotExist: row_object = None #end conditionally update Fragment_id fields in FragmentTypeRel #end def fragment_to_fragment(self): # ingests markerdata.csv and populates ArticleSynonymRel, Evidence, EvidenceEvidenceRel, EvidenceMarkerdataRel, EvidencePropertyTypeRel, Markerdata, Property def markerdata_to_markerdata(self): count=0 for row in self.rows: try: MarkerdataStringField.parse_and_save(row,count) count=count+1 except Exception: break # ingests morphdata.csv and populates ArticleSynonymRel, EvidencePropertyTypeRel, Property def morphdata_to_morphdata(self): #intial lines skipped still actual rows count=9 MorphdataPropertyRecords.save() for row in self.rows: try: MorphdataStringField.parse_and_save(row,count) count=count+1 except Exception: break # ingests notes.csv and populates Type(updates notes field) def notes_to_type(self): module_dir = os.path.dirname(__file__) # get current directory #notes_csv = self.request.FILES['file'].name #notes_csv_split = notes_csv.split('.') #notes_folder_name = notes_csv_split[0] notes_folder_name = 'packet_notes' notes_folder_path = os.path.join(module_dir, notes_folder_name) for row in self.rows: unique_ID = row['unique ID'] try: Type_id = int(unique_ID) except ValueError: Type_id = None notes_file = row['Notes file'] if notes_file != None: if len(notes_file) >= len('nnnn.txt'): notes_folder_path_notes_file = notes_folder_path + '/' + notes_file #example before: notes_folder_path_notes_file = '/Users/djh/wd/krasnow/csv2db/lib/packet_notes/1000.txt' notes_folder_path_notes_file = re.sub(r'csv2db/lib', r'static/csv2db/dat', notes_folder_path_notes_file) #example after : notes_folder_path_notes_file = '/Users/djh/wd/krasnow/static/csv2db/dat/packet_notes/1000.txt' try: fs = codecs.open(notes_folder_path_notes_file, 'r', 'utf-8') notes_txt = fs.read() fs.close() row_object = Type.objects.filter(id=Type_id).update(notes=notes_txt) except Type.DoesNotExist: row_object = None # ingests onhold_types_pmids.csv and populates Onhold def onhold_to_onhold(self): count=2 for row in self.rows: subregion = None type_id = None pmid_isbn = None name = None try: subregion = row['Subregion'] #type id try: type_id = int(row['Unique ID']) except ValueError: type_id=None row_object = ingest_errors(field='Unique ID',value=row['Unique ID'],filename='onhold_types_pmids.csv',file_row_num=count,comment='invalid Unique id value') row_object.save() continue # pmid isbn try: name = row['Type'].strip() pmid = row['PMID'].strip() pmid_isbn=int(pmid.replace('-','')) #check if article exists for this pmid try: count_ids = Article.objects.filter(pmid_isbn=pmid_isbn).order_by('id').count() except Article.DoesNotExist: try: row_object = article_not_found.objects.get(pmid_isbn=pmid_isbn) except article_not_found.DoesNotExist: row_object = article_not_found(pmid_isbn=pmid_isbn) row_object.save() if count_ids==0: try: row_object = article_not_found.objects.get(pmid_isbn=pmid_isbn) except article_not_found.DoesNotExist: row_object = article_not_found(pmid_isbn=pmid_isbn) row_object.save() except ValueError: pmid_isbn=None row_object = ingest_errors(field='PMID',value=row['PMID'],filename='onhold_types_pmids.csv',file_row_num=count,comment='invalid PMID value') row_object.save() continue row_object = Onhold( Type_id = type_id, subregion = subregion, pmid_isbn = pmid_isbn, name = name ) row_object.save() except Exception as e: row_object = ingest_errors(field='',value='',filename='onhold_types_pmids.csv',file_row_num=count,comment=str(e)) row_object.save() count=count+1 # ingests synonym.csv and populates Synonym, SynonymTypeRel def synonym_to_synonym(self): for row in self.rows: cited_names = row['Cited names'] if len(cited_names) == 0: cited_names = None try: unique_id = int(row['Unique ID']) except ValueError: unique_id = None row_object = Synonym( name = cited_names, cell_id = unique_id ) row_object.save() # write SynonymTypeRel record Synonym_id = row_object.id Type_id = row_object.cell_id row_object = SynonymTypeRel(Synonym_id=Synonym_id,Type_id=Type_id) row_object.save() # ingests term.csv and populates Term def term_to_term(self): for row in self.rows: parent = row['Parent'] if len(parent) == 0: parent = None concept = row['Concept'] if len(concept) == 0: concept = None term = row['Term'] if len(term) == 0: term = None try: resource_rank = int(row['Resource Rank']) except ValueError: resource_rank = None resource = row['Resource'] if len(resource) == 0: resource = None portal = row['Portal'] if len(portal) == 0: portal = None repository = row['Repository'] if len(repository) == 0: repository = None unique_id = row['Unique ID'] if len(unique_id) == 0: unique_id = None definition_link = row['Definition Link'] if len(definition_link) == 0: definition_link = None definition = row['Definition'] if len(definition) == 0: definition = None protein_gene = row['protein_gene'] if len(protein_gene) == 0: protein_gene = None human_rat = row['human_rat'] if len(human_rat) == 0: human_rat = None control = row['control'] if len(control) == 0: control = None row_object = Term( parent = parent, concept = concept, term = term, resource_rank = resource_rank, resource = resource, portal = portal, repository = repository, unique_id = unique_id, definition_link = definition_link, definition = definition, protein_gene = protein_gene, human_rat = human_rat, control = control ) row_object.save() # ingests type.csv and populates Type(all but notes field) def type_to_type(self, dev): for row in self.rows: status = row['status'] if status == 'active': id = int(row['id']) try: position = int(row['position']) position_HC_standard = int(row['position_HC_standard']) except ValueError: position = None explanatory_notes = row['explanatory_notes'] if len(explanatory_notes) == 0: explanatory_notes = None subregion = row['subregion'] if len(subregion) == 0: subregion = None full_name = row['full_name'] if len(full_name) == 0: full_name = None intermediate_name = row['intermediate_name'] if len(intermediate_name) == 0: intermediate_name = None short_name = row['short_name'] if len(short_name) == 0: short_name = None if dev == 'true': # overide for dev site position = position_HC_standard short_name = intermediate_name excit_inhib = row['excit_inhib'] notes = None try: row_object = Type.objects.get(id=id) row_object = Type.objects.filter(id=id).update(position=position,nickname=short_name) except Type.DoesNotExist: row_object = Type( id = id, position = position, explanatory_notes = explanatory_notes, subregion = subregion, name = full_name, nickname = short_name, excit_inhib = excit_inhib, status = status, notes = notes ) row_object.save()
class FileObjectPosix(object): """ A file-like object that operates on non-blocking files but provides a synchronous, cooperative interface. .. caution:: This object is most effective wrapping files that can be used appropriately with :func:`select.select` such as sockets and pipes. In general, on most platforms, operations on regular files (e.g., ``open('/etc/hosts')``) are considered non-blocking already, even though they can take some time to complete as data is copied to the kernel and flushed to disk (this time is relatively bounded compared to sockets or pipes, though). A :func:`~os.read` or :func:`~os.write` call on such a file will still effectively block for some small period of time. Therefore, wrapping this class around a regular file is unlikely to make IO gevent-friendly: reading or writing large amounts of data could still block the event loop. If you'll be working with regular files and doing IO in large chunks, you may consider using :class:`~gevent.fileobject.FileObjectThread` or :func:`~gevent.os.tp_read` and :func:`~gevent.os.tp_write` to bypass this concern. .. note:: Random read/write (e.g., ``mode='rwb'``) is not supported. For that, use :class:`io.BufferedRWPair` around two instance of this class. .. tip:: Although this object provides a :meth:`fileno` method and so can itself be passed to :func:`fcntl.fcntl`, setting the :data:`os.O_NONBLOCK` flag will have no effect; however, removing that flag will cause this object to no longer be cooperative. .. versionchanged:: 1.1 Now uses the :mod:`io` package internally. Under Python 2, previously used the undocumented class :class:`socket._fileobject`. This provides better file-like semantics (and portability to Python 3). """ #: platform specific default for the *bufsize* parameter default_bufsize = io.DEFAULT_BUFFER_SIZE def __init__(self, fobj, mode='rb', bufsize=-1, close=True): """ :keyword fobj: Either an integer fileno, or an object supporting the usual :meth:`socket.fileno` method. The file *will* be put in non-blocking mode using :func:`gevent.os.make_nonblocking`. :keyword str mode: The manner of access to the file, one of "rb", "rU" or "wb" (where the "b" or "U" can be omitted). If "U" is part of the mode, IO will be done on text, otherwise bytes. :keyword int bufsize: If given, the size of the buffer to use. The default value means to use a platform-specific default, and a value of 0 is translated to a value of 1. Other values are interpreted as for the :mod:`io` package. Buffering is ignored in text mode. """ if isinstance(fobj, int): fileno = fobj fobj = None else: fileno = fobj.fileno() if not isinstance(fileno, int): raise TypeError('fileno must be int: %r' % fileno) orig_mode = mode mode = (mode or 'rb').replace('b', '') if 'U' in mode: self._translate = True mode = mode.replace('U', '') else: self._translate = False if len(mode) != 1 and mode not in 'rw': # pragma: no cover # Python 3 builtin `open` raises a ValueError for invalid modes; # Python 2 ignores it. In the past, we raised an AssertionError, if __debug__ was # enabled (which it usually was). Match Python 3 because it makes more sense # and because __debug__ may not be enabled. # NOTE: This is preventing a mode like 'rwb' for binary random access; # that code was never tested and was explicitly marked as "not used" raise ValueError('mode can only be [rb, rU, wb], not %r' % (orig_mode,)) self._fobj = fobj self._closed = False self._close = close self.fileio = GreenFileDescriptorIO(fileno, mode, closefd=close) if bufsize < 0 or bufsize == 1: bufsize = self.default_bufsize elif bufsize == 0: bufsize = 1 if mode == 'r': self.io = BufferedReader(self.fileio, bufsize) else: assert mode == 'w' self.io = BufferedWriter(self.fileio, bufsize) #else: # QQQ: not used, not reachable # # self.io = BufferedRandom(self.fileio, bufsize) if self._translate: self.io = TextIOWrapper(self.io) @property def closed(self): """True if the file is closed""" return self._closed def close(self): if self._closed: # make sure close() is only run once when called concurrently return self._closed = True try: self.io.close() self.fileio.close() finally: self._fobj = None def flush(self): self.io.flush() def fileno(self): return self.io.fileno() def write(self, data): self.io.write(data) def writelines(self, lines): self.io.writelines(lines) def read(self, size=-1): return self.io.read(size) def readline(self, size=-1): return self.io.readline(size) def readlines(self, sizehint=0): return self.io.readlines(sizehint) def readable(self): """ .. versionadded:: 1.1b2 """ return self.io.readable() def writable(self): """ .. versionadded:: 1.1b2 """ return self.io.writable() def seek(self, *args, **kwargs): return self.io.seek(*args, **kwargs) def seekable(self): return self.io.seekable() def tell(self): return self.io.tell() def truncate(self, size=None): return self.io.truncate(size) def __iter__(self): return self.io def __getattr__(self, name): # XXX: Should this really be _fobj, or self.io? # _fobj can easily be None but io never is return getattr(self._fobj, name)
class FileObjectPosix(object): """ A file-like object that operates on non-blocking files but provides a synchronous, cooperative interface. .. note:: Random read/write (e.g., ``mode='rwb'``) is not supported. For that, use :class:`io.BufferedRWPair` around two instance of this class. .. tip:: Although this object provides a :meth:`fileno` method and so can itself be passed to :func:`fcntl.fcntl`, setting the :data:`os.O_NONBLOCK` flag will have no effect; likewise, removing that flag will cause this object to no longer be cooperative. """ #: platform specific default for the *bufsize* parameter default_bufsize = io.DEFAULT_BUFFER_SIZE def __init__(self, fobj, mode='rb', bufsize=-1, close=True): """ :keyword fobj: Either an integer fileno, or an object supporting the usual :meth:`socket.fileno` method. The file *will* be put in non-blocking mode using :func:`gevent.os.make_nonblocking`. :keyword str mode: The manner of access to the file, one of "rb", "rU" or "wb" (where the "b" or "U" can be omitted). If "U" is part of the mode, IO will be done on text, otherwise bytes. :keyword int bufsize: If given, the size of the buffer to use. The default value means to use a platform-specific default, and a value of 0 is translated to a value of 1. Other values are interpreted as for the :mod:`io` package. Buffering is ignored in text mode. """ if isinstance(fobj, int): fileno = fobj fobj = None else: fileno = fobj.fileno() if not isinstance(fileno, int): raise TypeError('fileno must be int: %r' % fileno) orig_mode = mode mode = (mode or 'rb').replace('b', '') if 'U' in mode: self._translate = True mode = mode.replace('U', '') else: self._translate = False if len(mode) != 1 and mode not in 'rw': # pragma: no cover # Python 3 builtin `open` raises a ValueError for invalid modes; # Python 2 ignores it. In the past, we raised an AssertionError, if __debug__ was # enabled (which it usually was). Match Python 3 because it makes more sense # and because __debug__ may not be enabled. # NOTE: This is preventing a mode like 'rwb' for binary random access; # that code was never tested and was explicitly marked as "not used" raise ValueError('mode can only be [rb, rU, wb], not %r' % (orig_mode,)) self._fobj = fobj self._closed = False self._close = close self.fileio = GreenFileDescriptorIO(fileno, mode, closefd=close) if bufsize < 0 or bufsize == 1: bufsize = self.default_bufsize elif bufsize == 0: bufsize = 1 if mode == 'r': self.io = BufferedReader(self.fileio, bufsize) else: assert mode == 'w' self.io = BufferedWriter(self.fileio, bufsize) #else: # QQQ: not used, not reachable # # self.io = BufferedRandom(self.fileio, bufsize) if self._translate: self.io = TextIOWrapper(self.io) @property def closed(self): """True if the file is cloed""" return self._closed def close(self): if self._closed: # make sure close() is only run once when called concurrently return self._closed = True try: self.io.close() self.fileio.close() finally: self._fobj = None def flush(self): self.io.flush() def fileno(self): return self.io.fileno() def write(self, data): self.io.write(data) def writelines(self, lines): self.io.writelines(lines) def read(self, size=-1): return self.io.read(size) def readline(self, size=-1): return self.io.readline(size) def readlines(self, sizehint=0): return self.io.readlines(sizehint) def readable(self): return self.io.readable() def writable(self): return self.io.writable() def seek(self, *args, **kwargs): return self.io.seek(*args, **kwargs) def seekable(self): return self.io.seekable() def tell(self): return self.io.tell() def truncate(self, size=None): return self.io.truncate(size) def __iter__(self): return self.io def __getattr__(self, name): # XXX: Should this really be _fobj, or self.io? # _fobj can easily be None but io never is return getattr(self._fobj, name)
class FileObjectPosix(object): """ A file-like object that operates on non-blocking files. .. seealso:: :func:`gevent.os.make_nonblocking` """ default_bufsize = io.DEFAULT_BUFFER_SIZE def __init__(self, fobj, mode='rb', bufsize=-1, close=True): """ :param fobj: Either an integer fileno, or an object supporting the usual :meth:`socket.fileno` method. The file will be put in non-blocking mode. """ if isinstance(fobj, int): fileno = fobj fobj = None else: fileno = fobj.fileno() if not isinstance(fileno, int): raise TypeError('fileno must be int: %r' % fileno) mode = (mode or 'rb').replace('b', '') if 'U' in mode: self._translate = True mode = mode.replace('U', '') else: self._translate = False assert len(mode) == 1, 'mode can only be [rb, rU, wb]' self._fobj = fobj self._closed = False self._close = close self.fileio = GreenFileDescriptorIO(fileno, mode, closefd=close) if bufsize < 0: bufsize = self.default_bufsize if mode == 'r': if bufsize == 0: bufsize = 1 elif bufsize == 1: bufsize = self.default_bufsize self.io = BufferedReader(self.fileio, bufsize) elif mode == 'w': if bufsize == 0: bufsize = 1 elif bufsize == 1: bufsize = self.default_bufsize self.io = BufferedWriter(self.fileio, bufsize) else: # QQQ: not used self.io = BufferedRandom(self.fileio, bufsize) if self._translate: self.io = TextIOWrapper(self.io) @property def closed(self): """True if the file is cloed""" return self._closed def close(self): if self._closed: # make sure close() is only ran once when called concurrently return self._closed = True try: self.io.close() self.fileio.close() finally: self._fobj = None def flush(self): self.io.flush() def fileno(self): return self.io.fileno() def write(self, data): self.io.write(data) def writelines(self, lines): self.io.writelines(lines) def read(self, size=-1): return self.io.read(size) def readline(self, size=-1): return self.io.readline(size) def readlines(self, sizehint=0): return self.io.readlines(sizehint) def seek(self, *args, **kwargs): return self.io.seek(*args, **kwargs) def seekable(self): return self.io.seekable() def tell(self): return self.io.tell() def truncate(self, size=None): return self.io.truncate(size) def __iter__(self): return self.io def __getattr__(self, name): return getattr(self._fobj, name)
class FileObjectPosix(object): """ A file-like object that operates on non-blocking files. .. seealso:: :func:`gevent.os.make_nonblocking` """ default_bufsize = io.DEFAULT_BUFFER_SIZE def __init__(self, fobj, mode='rb', bufsize=-1, close=True): """ :param fobj: Either an integer fileno, or an object supporting the usual :meth:`socket.fileno` method. The file will be put in non-blocking mode. """ if isinstance(fobj, int): fileno = fobj fobj = None else: fileno = fobj.fileno() if not isinstance(fileno, int): raise TypeError('fileno must be int: %r' % fileno) orig_mode = mode mode = (mode or 'rb').replace('b', '') if 'U' in mode: self._translate = True mode = mode.replace('U', '') else: self._translate = False if len(mode) != 1: # Python 3 builtin `open` raises a ValueError for invalid modes; # Python 2 ignores in. In the past, we raised an AssertionError, if __debug__ was # enabled (which it usually was). Match Python 3 because it makes more sense # and because __debug__ may not be enabled raise ValueError('mode can only be [rb, rU, wb], not %r' % (orig_mode,)) self._fobj = fobj self._closed = False self._close = close self.fileio = GreenFileDescriptorIO(fileno, mode, closefd=close) if bufsize < 0: bufsize = self.default_bufsize if mode == 'r': if bufsize == 0: bufsize = 1 elif bufsize == 1: bufsize = self.default_bufsize self.io = BufferedReader(self.fileio, bufsize) elif mode == 'w': if bufsize == 0: bufsize = 1 elif bufsize == 1: bufsize = self.default_bufsize self.io = BufferedWriter(self.fileio, bufsize) else: # QQQ: not used self.io = BufferedRandom(self.fileio, bufsize) if self._translate: self.io = TextIOWrapper(self.io) @property def closed(self): """True if the file is cloed""" return self._closed def close(self): if self._closed: # make sure close() is only ran once when called concurrently return self._closed = True try: self.io.close() self.fileio.close() finally: self._fobj = None def flush(self): self.io.flush() def fileno(self): return self.io.fileno() def write(self, data): self.io.write(data) def writelines(self, lines): self.io.writelines(lines) def read(self, size=-1): return self.io.read(size) def readline(self, size=-1): return self.io.readline(size) def readlines(self, sizehint=0): return self.io.readlines(sizehint) def readable(self): return self.io.readable() def writable(self): return self.io.writable() def seek(self, *args, **kwargs): return self.io.seek(*args, **kwargs) def seekable(self): return self.io.seekable() def tell(self): return self.io.tell() def truncate(self, size=None): return self.io.truncate(size) def __iter__(self): return self.io def __getattr__(self, name): # XXX: Should this really be _fobj, or self.io? # _fobj can easily be None but io never is return getattr(self._fobj, name)