def _download_data_set(self): make_sure_dir_exists(os.path.dirname(self.DATA_SET_FILE_PATH)) download_file_if_not_exists(self.DATA_SET_URL, self.DATA_SET_FILE_PATH, self.DATA_SET_MD5_SUM) download_file_if_not_exists(self.ANNOTATION_URL, self.ANNOTATION_FILE_PATH, self.ANNOTATION_MD5_SUM)
def _download_data_set(self): make_sure_dir_exists(self.DATA_SET_DIR_NAME) download_file_if_not_exists(self.PBMC_RNA_DATA_URL, self.PBMC_RNA_DATA_FILE_PATH, self.PBMC_RNA_DATA_MD5_SUM) download_file_if_not_exists(self.PBMC_ADT_DATA_URL, self.PBMC_ADT_DATA_FILE_PATH, self.PBMC_ADT_DATA_MD5_SUM) download_file_if_not_exists(self.PBMC_TRANSFORMED_ADT_DATA_URL, self.PBMC_TRANSFORMED_ADT_DATA_FILE_PATH, self.PBMC_TRANSFORMED_ADT_DATA_MD5_SUM) download_file_if_not_exists(self.CBMC_RNA_DATA_URL, self.CBMC_RNA_DATA_FILE_PATH, self.CBMC_RNA_DATA_MD5_SUM) download_file_if_not_exists(self.CBMC_ADT_DATA_URL, self.CBMC_ADT_DATA_FILE_PATH, self.CBMC_ADT_DATA_MD5_SUM) download_file_if_not_exists(self.CBMC_TRANSFORMED_ADT_DATA_URL, self.CBMC_TRANSFORMED_ADT_DATA_FILE_PATH, self.CBMC_TRANSFORMED_ADT_DATA_MD5_SUM) download_file_if_not_exists(self.CD8_RNA_DATA_URL, self.CD8_RNA_DATA_FILE_PATH, self.CD8_RNA_DATA_MD5_SUM) download_file_if_not_exists(self.CD8_ADT_DATA_URL, self.CD8_ADT_DATA_FILE_PATH, self.CD8_ADT_DATA_MD5_SUM) download_file_if_not_exists(self.CD8_TRANSFORMED_ADT_DATA_URL, self.CD8_TRANSFORMED_ADT_DATA_FILE_PATH, self.CD8_TRANSFORMED_ADT_DATA_MD5_SUM)
def save_model_and_outputs(): args.output_dir = os.path.abspath(args.output_dir) make_sure_dir_exists(args.output_dir) log("Saving output results to {}".format(args.output_dir)) if not args.ignore_model: dump_gzip_pickle(model.get_full_state_dict(), os.path.join(args.output_dir, "model.pkl.gz")) if not args.ignore_output: data_loader = DataLoader(dataset=data_set, batch_size=args.batch_size, num_workers=4, collate_fn=collate_function, shuffle=False, drop_last=False) all_outputs = get_output(model, data_loader) if args.mean_only: features = ["mean"] for feature in ["mean", "r", "pi", "normal_mean_nodes"]: if feature in all_outputs: print("Saving %s ..." % feature) output = all_outputs[feature] output = pd.DataFrame(output.transpose(), index=data_set.genes, columns=data_set.cells) output = output.round(3) write_csv( output, os.path.join(args.output_dir, "{}.csv.gz".format(feature))) if args.debug: import IPython IPython.embed()