예제 #1
0
from partition import Partition
from tag import Tag

if __name__ == '__main__':
    p1 = Partition(corpus='./dataset/mini.txt', )
    p2 = Partition(trainNow=False,
                   model_1gram_path='./model/2014_corpus_train.1gram',
                   model_2gram_path='./model/2014_corpus_train.2gram')
    p2.evaluate('./dataset/2014_corpus_test1.txt')

    t1 = Tag(corpus='./dataset/2014_corpus_train.txt')
    t1.tag(p2.segment("今天是个好日子。"))
    t1.evaluate(testset_path='./dataset/2014_corpus_test.txt')
예제 #2
0
    record = TCSeqRec()
    record.dv = [(dv_matrix_strip_header, labels_strip_header)]
    record.tree = tree
    record.name = name
    record.headers = labels_strip_header.split()
    record.sequences = ['' for _ in record.headers]
    record._update()
    records.append(record)

collection = SequenceCollection(records=records, get_distances=False,
                                gtp_path=os.environ['GTP_PATH'])
collection.put_distance_matrices('rf')
T = \
    collection.Clustering.run_spectral_rotate(collection.distance_matrices['rf'
        ])
collection.partitions[T] = Partition(T)
collection.clusters_to_partitions[('rf', 'spectral_rotate', max(T))] = T
collection.concatenate_records()
cluster_recs = collection.get_cluster_records()

number_of_clusters = len(cluster_recs)
for j in range(number_of_clusters):
    record = cluster_recs[j]
    record_dv = record.dv[0]
    labels = record.dv[1]

    # Write some temp files from our concatenated record
    # as input for tree collection -
    # ..._dv.txt     = concatenated distance matrices
    # ..._map.txt    = updated genome map - may have gained new
    #                  species in the concatenation, also labels
예제 #3
0
def get_partitions(images, checksums, logger):
    # type: (VirtualCat, List[str], logging.Logger) -> List[Partition]
    partitions = []  # type: List[Partition]
    next_magic = images.peek()
    # First 384K is u-boot for legacy or regular-fit images OR
    # the combination of SPL + recovery u-boot. Treat them as the same.
    if next_magic in ExternalChecksumPartition.UBootMagics:
        partitions.append(
            ExternalChecksumPartition(0x060000, 0x000000, 'u-boot', images,
                                      checksums, logger))
    else:
        logger.error('Unrecognized magic 0x{:x} at offset 0x{:x}.'.format(
            next_magic, 0))
        sys.exit(1)

    # Env is always in the same location for both legacy and FIT images.
    partitions.append(
        EnvironmentPartition(0x020000, 0x060000, 'env', images, logger))

    # Either we are using the legacy image format or the FIT format.
    next_magic = images.peek()
    if next_magic == LegacyUBootPartition.magic:
        partitions.append(
            LegacyUBootPartition([0x280000, 0x0400000], 0x080000, 'kernel',
                                 images, logger, LegacyUBootPartition.magic))
        partitions.append(
            LegacyUBootPartition(
                [0xc00000, 0x1780000],
                partitions[-1].end(),
                'rootfs',
                images,
                logger,
            ))
    elif next_magic == DeviceTreePartition.magic:
        # The FIT image at 0x80000 could be a u-boot image (size 0x60000)
        # or the kernel+rootfs FIT which is much larger.
        # DeviceTreePartition() will pick the smallest which fits.
        part = DeviceTreePartition([0x60000, 0x1B200000], 0x80000, "fit1",
                                   images, logger)
        partitions.append(part)

        # If the end of the above partition is 0xE0000 then we need to
        # check a second FIT image. This is definitely the larger one.
        if (part.end() == 0xE0000):
            partitions.append(
                DeviceTreePartition([0x1B200000], 0xE0000, "fit2", images,
                                    logger))
    else:
        logging.error('Unrecognized magic 0x{:x} at offset 0x{:x}.'.format(
            next_magic, 0x80000))
        sys.exit(1)
    if images.images != []:
        # TODO data0 missing is only okay for ImageFiles, not
        # MemoryTechnologyDevices.  Also, this omits data0 from mtdparts=
        # message.
        partitions.append(
            Partition(
                0x2000000 - partitions[-1].end(),
                partitions[-1].end(),
                'data0',
                images,
                logger,
            ))
    return partitions