from partition import Partition from tag import Tag if __name__ == '__main__': p1 = Partition(corpus='./dataset/mini.txt', ) p2 = Partition(trainNow=False, model_1gram_path='./model/2014_corpus_train.1gram', model_2gram_path='./model/2014_corpus_train.2gram') p2.evaluate('./dataset/2014_corpus_test1.txt') t1 = Tag(corpus='./dataset/2014_corpus_train.txt') t1.tag(p2.segment("今天是个好日子。")) t1.evaluate(testset_path='./dataset/2014_corpus_test.txt')
record = TCSeqRec() record.dv = [(dv_matrix_strip_header, labels_strip_header)] record.tree = tree record.name = name record.headers = labels_strip_header.split() record.sequences = ['' for _ in record.headers] record._update() records.append(record) collection = SequenceCollection(records=records, get_distances=False, gtp_path=os.environ['GTP_PATH']) collection.put_distance_matrices('rf') T = \ collection.Clustering.run_spectral_rotate(collection.distance_matrices['rf' ]) collection.partitions[T] = Partition(T) collection.clusters_to_partitions[('rf', 'spectral_rotate', max(T))] = T collection.concatenate_records() cluster_recs = collection.get_cluster_records() number_of_clusters = len(cluster_recs) for j in range(number_of_clusters): record = cluster_recs[j] record_dv = record.dv[0] labels = record.dv[1] # Write some temp files from our concatenated record # as input for tree collection - # ..._dv.txt = concatenated distance matrices # ..._map.txt = updated genome map - may have gained new # species in the concatenation, also labels
def get_partitions(images, checksums, logger): # type: (VirtualCat, List[str], logging.Logger) -> List[Partition] partitions = [] # type: List[Partition] next_magic = images.peek() # First 384K is u-boot for legacy or regular-fit images OR # the combination of SPL + recovery u-boot. Treat them as the same. if next_magic in ExternalChecksumPartition.UBootMagics: partitions.append( ExternalChecksumPartition(0x060000, 0x000000, 'u-boot', images, checksums, logger)) else: logger.error('Unrecognized magic 0x{:x} at offset 0x{:x}.'.format( next_magic, 0)) sys.exit(1) # Env is always in the same location for both legacy and FIT images. partitions.append( EnvironmentPartition(0x020000, 0x060000, 'env', images, logger)) # Either we are using the legacy image format or the FIT format. next_magic = images.peek() if next_magic == LegacyUBootPartition.magic: partitions.append( LegacyUBootPartition([0x280000, 0x0400000], 0x080000, 'kernel', images, logger, LegacyUBootPartition.magic)) partitions.append( LegacyUBootPartition( [0xc00000, 0x1780000], partitions[-1].end(), 'rootfs', images, logger, )) elif next_magic == DeviceTreePartition.magic: # The FIT image at 0x80000 could be a u-boot image (size 0x60000) # or the kernel+rootfs FIT which is much larger. # DeviceTreePartition() will pick the smallest which fits. part = DeviceTreePartition([0x60000, 0x1B200000], 0x80000, "fit1", images, logger) partitions.append(part) # If the end of the above partition is 0xE0000 then we need to # check a second FIT image. This is definitely the larger one. if (part.end() == 0xE0000): partitions.append( DeviceTreePartition([0x1B200000], 0xE0000, "fit2", images, logger)) else: logging.error('Unrecognized magic 0x{:x} at offset 0x{:x}.'.format( next_magic, 0x80000)) sys.exit(1) if images.images != []: # TODO data0 missing is only okay for ImageFiles, not # MemoryTechnologyDevices. Also, this omits data0 from mtdparts= # message. partitions.append( Partition( 0x2000000 - partitions[-1].end(), partitions[-1].end(), 'data0', images, logger, )) return partitions