def test_save(self): # Test saving and loading collection testcoll = AtomsCollection([Atoms('H')]) testcoll.set_array('test', np.array([2])) outf = os.path.join(_TEST_DIR, 'collection.pkl') testcoll.save(outf) # Reload testcoll = AtomsCollection.load(outf) self.assertEqual(testcoll.get_array('test')[0], 2) os.remove(outf)
args = parser.parse_args() # First, parse the gene list gene_list = load_genefile(args.seedname + '.gene') # Then create an AtomsCollection if not args.pkl: aC = AtomsCollection(args.input_files, info={'name': args.seedname}, cell_reduce=True, progress=True) else: aC = AtomsCollection([]) for f in args.input_files: aC += AtomsCollection.load(f) # Take only n lowest energies if required if (args.n is not None): # Sort by energy CalcEnergy.get(aC, store_array=True) aC = aC.sorted_byarray('calc_energy')[:args.n] #discard_Z = args.Z is not None # Calculate genomes nrange = (0, 1) if args.gene_clamp else (None, None) ndist = 1.0 if args.gene_clamp else None pC = PhylogenCluster(aC, genes=[], norm_range=nrange, norm_dist=ndist) pC.set_genes(gene_list, load_arrays=True)
def __main__(): # So, first parse the command line arguments parser = ap.ArgumentParser() # Main argument parser.add_argument('seedname', type=str, default=None, help="Seedname of the job") parser.add_argument('input_files', type=str, nargs='+', default=None, help="Files to load and analyse") # Optional arguments # Boolean args parser.add_argument('-pkl', action='store_true', default=False, help="Input is one or more AtomsCollections" " in PICKLE format") parser.add_argument('-tree', action='store_true', default=False, help="Create JSON tree file") parser.add_argument('-distmat', action='store_true', default=False, help="Save a distance matrix file") parser.add_argument('-forcemat', action='store_true', default=False, help="Create JSON distance matrix file for force " "layout") parser.add_argument('-genomes', action='store_true', default=False, help="Save genomes text file") parser.add_argument('-gene_png', action='store_true', default=False, help="Save genomes png file") parser.add_argument('-gene_clamp', action='store_true', default=True, help="Clamp genes between 0 and 1") # Value args parser.add_argument('-n', type=int, default=None, help="Number of lowest energy structures to keep") # parser.add_argument('-Z', type=int, default=None, # help="Expected numbers of molecules in cell (discard structures " # "that don't agree)") parser.add_argument('-corrmat_minforce', type=float, default=0.01, help="Correlation matrix minimum force") parser.add_argument('-corrmat_maxforce', type=float, default=1.0, help="Correlation matrix maximum force") parser.add_argument('-savepkl', type=str, default=None, help="Directory to save collection as pickle file") args = parser.parse_args() # First, parse the gene list gene_list = load_genefile(args.seedname + '.gene') # Then create an AtomsCollection if not args.pkl: aC = AtomsCollection(args.input_files, info={'name': args.seedname}, cell_reduce=True, progress=True) else: aC = AtomsCollection([]) for f in args.input_files: aC += AtomsCollection.load(f) # Take only n lowest energies if required if (args.n is not None): # Sort by energy CalcEnergy.get(aC, store_array=True) aC = aC.sorted_byarray('calc_energy')[:args.n] #discard_Z = args.Z is not None # Calculate genomes nrange = (0, 1) if args.gene_clamp else (None, None) ndist = 1.0 if args.gene_clamp else None pC = PhylogenCluster(aC, genes=[], norm_range=nrange, norm_dist=ndist) pC.set_genes(gene_list, load_arrays=True) if args.savepkl is not None: # Save the structure pC.save_collection(os.path.join(args.savepkl, args.seedname + '.pkl')) # And now for the output if args.genomes: with open(args.seedname + "_genomes.dat", 'w') as ofile: genvec, legend = pC.get_genome_vectors() # Create a header header = ('Structure\t' '{0}\n').format('\t'.join([ '\t'.join([ '{0}_{1}'.format(l[0], i + 1) for i in range(l[1]) ]) for l in legend ])) ofile.write(header) try: for i, gen in enumerate(genvec): ofile.write(aC.structures[i].info['name'] + '\t' + ' '.join([str(x) for x in gen]) + '\n') except KeyError: # Someone has no name! raise RuntimeError('Not all structures have a name, ' 'can\'t print out genomes as table') if args.gene_png and _has_matplotlib: with open(args.seedname + "_genomes.png", 'w') as ofile: genomes2png(aC, pC, ofile) if args.tree: tree = pC.get_hier_tree() with open(args.seedname + "_tree.json", 'w') as ofile: json.dump(tree2json(aC, pC), ofile)