def prepare_argument_path(self, argument_noun, raw_arguments, draft_folder, default_path): data_type = get_data_type(argument_noun) # If client sent direct content (x_table_csv), save it for file_format in data_type.formats: raw_argument_name = '%s_%s' % (argument_noun, file_format) if raw_argument_name not in raw_arguments: continue source_text = raw_arguments[raw_argument_name] target_name = '%s.%s' % (argument_noun, file_format) return copy_text(join(draft_folder, target_name), source_text) # Raise KeyError if client did not specify noun (x_table) v = raw_arguments[argument_noun] # If client sent multipart content, save it if hasattr(v, 'file'): target_name = argument_noun + get_file_extension(v.filename) return copy_file(join(draft_folder, target_name), v.file) # If client sent empty content, use default if v == '': if not default_path: raise KeyError target_name = argument_noun + get_file_extension(default_path) return link_path(join(draft_folder, target_name), default_path) # If client sent a relative path (x_table=11/x/y.csv), find it if '/' in v: source_path = self.get_file_path(*parse_result_relative_path(v)) target_name = argument_noun + get_file_extension(source_path) return link_path(join(draft_folder, target_name), source_path) # If client sent an upload id (x_table=x), find it upload = get_upload(self, upload_id=v) source_path = realpath(join(upload.folder, data_type.get_file_name())) target_name = argument_noun + get_file_extension(source_path) target_path = move_path(join(draft_folder, target_name), source_path) remove_safely(upload.folder) return target_path
def prepare_argument_path( self, argument_noun, raw_arguments, draft_folder, default_path): data_type = get_data_type(argument_noun) # If client sent direct content (x_table_csv), save it for file_format in data_type.formats: raw_argument_name = '%s_%s' % (argument_noun, file_format) if raw_argument_name not in raw_arguments: continue source_text = raw_arguments[raw_argument_name] target_name = '%s.%s' % (argument_noun, file_format) return copy_text(join(draft_folder, target_name), source_text) # Raise KeyError if client did not specify noun (x_table) v = raw_arguments[argument_noun] # If client sent multipart content, save it if hasattr(v, 'file'): target_name = argument_noun + get_file_extension(v.filename) return copy_file(join(draft_folder, target_name), v.file) # If client sent empty content, use default if v == '': if not default_path: raise KeyError target_name = argument_noun + get_file_extension(default_path) return link_path(join(draft_folder, target_name), default_path) # If client sent a relative path (x_table=11/x/y.csv), find it if '/' in v: source_path = self.get_file_path(*parse_result_relative_path(v)) target_name = argument_noun + get_file_extension(source_path) return link_path(join(draft_folder, target_name), source_path) # If client sent an upload id (x_table=x), find it upload = get_upload(self, upload_id=v) source_path = join(upload.folder, data_type.get_file_name()) target_name = argument_noun + get_file_extension(source_path) target_path = move_path(join(draft_folder, target_name), source_path) remove_safely(upload.folder) return target_path
def save(Class, path, table): if path.endswith(".csv"): table.to_csv(path, encoding="utf-8", index=False) elif path.endswith(".msg"): table.to_msgpack(path, compress="blosc") elif path.endswith(".json"): table.to_json(path) elif path.endswith(".xls") or path.endswith(".xlsx"): table.to_excel(path) else: raise DataTypeError("file format not supported (%s)" % get_file_extension(path))
def save(Class, data_folder, owner_id, id_length, source_name, source_x): source_extension = get_file_extension(source_name) source_file = prepare_file(source_x) instance = Class.spawn(data_folder, id_length, owner_id) instance.name = source_name instance.path = join(instance.folder, 'raw' + source_extension) # Save name open(join(instance.folder, 'name.txt'), 'wt').write(source_name) # Save path temporary_path = join(instance.folder, 'temporary.bin') with open(temporary_path, 'wb') as temporary_file: copyfileobj(source_file, temporary_file) rename(temporary_path, instance.path) return instance
def load(Class, path): if not exists(path): raise IOError if path.endswith(".csv"): try: table = pandas.read_csv(path, encoding="utf-8", skipinitialspace=True) except UnicodeDecodeError: encoding = _get_encoding(open(path).read()) table = pandas.read_csv(path, encoding=encoding, skipinitialspace=True) elif path.endswith(".msg"): table = pandas.read_msgpack(path) elif path.endswith(".json"): table = pandas.read_json(path, orient="split") elif path.endswith(".xls") or path.endswith(".xlsx"): table = pandas.read_excel(path) else: raise DataTypeError("file format not supported (%s)" % get_file_extension(path)) return table
def prepare_script_folder(target_folder, notebook, notebook_name): tool_arguments = load_tool_arguments(notebook) # Prepare paths for k, v in tool_arguments.items(): if not k.endswith('_path'): continue path = make_unique_path(target_folder, get_file_extension(v)) shutil.copy(v, path) tool_arguments[k] = basename(path) # Prepare command-line script script_lines = [] script_lines.append('from sys import argv') for i, arg in zip(range(1, len(tool_arguments) + 1), tool_arguments): if arg.endswith('_integer'): script_lines.append('%s = int(argv[%s])' % (arg, i)) else: script_lines.append('%s = argv[%s]' % (arg, i)) #script_lines.append('%s = argv[1:]' % ', '.join(tool_arguments)) notebook.cells[0]['source'] = '\n'.join(script_lines) script_content, script_info = nbconvert.export_script(notebook) script_name = 'run' + script_info['output_extension'] if script_name.endswith('.py'): command_name = 'python' else: raise CrossComputeError # Save script script_path = join(target_folder, script_name) codecs.open(script_path, 'w', encoding='utf-8').write(script_content) # Save configuration configuration_path = join(target_folder, 'cc.ini') configuration_lines = [] configuration_lines.append('[crosscompute %s]' % notebook_name) configuration_lines.append( 'command_template = %s %s %s' % (command_name, script_name, ' '.join('{%s}' % x for x in tool_arguments).strip())) for k, v in tool_arguments.items(): if k in RESERVED_ARGUMENT_NAMES: continue configuration_lines.append('%s = %s' % (k, v)) codecs.open(configuration_path, 'w', encoding='utf-8').write('\n'.join(configuration_lines).strip()) return target_folder
def prepare_script_folder(target_folder, notebook, notebook_name): tool_arguments = load_tool_arguments(notebook) # Prepare paths for k, v in tool_arguments.items(): if not k.endswith("_path"): continue path = make_unique_path(target_folder, get_file_extension(v)) shutil.copy(v, path) tool_arguments[k] = basename(path) # Prepare command-line script script_lines = [] script_lines.append("from sys import argv") script_lines.append("%s = argv[1:]" % ", ".join(tool_arguments)) notebook.cells[0]["source"] = "\n".join(script_lines) script_content, script_info = nbconvert.export_script(notebook) script_name = "run" + script_info["output_extension"] if script_name.endswith(".py"): command_name = "python" else: raise CrossComputeError # Save script script_path = join(target_folder, script_name) codecs.open(script_path, "w", encoding="utf-8").write(script_content) # Save configuration configuration_path = join(target_folder, "cc.ini") configuration_lines = [] configuration_lines.append("[crosscompute %s]" % notebook_name) configuration_lines.append( "command_template = %s %s %s" % (command_name, script_name, " ".join("{%s}" % x for x in tool_arguments).strip()) ) for k, v in tool_arguments.items(): if k in RESERVED_ARGUMENT_NAMES: continue configuration_lines.append("%s = %s" % (k, v)) codecs.open(configuration_path, "w", encoding="utf-8").write("\n".join(configuration_lines).strip()) return target_folder
def test_get_file_extension(): assert get_file_extension('file.txt.zip') == '.txt.zip'