Exemple #1
0

def ll2s(x):
    """ converts a list of lists, e.g. [['guid1',2],['guid2',0]] into a set {'guid1','guid2'} """
    neighbour_set = set()
    for neighbour in x:
        neighbour_set.add(neighbour[0])
    return neighbour_set


# define directory where the fastas are
fastadir = os.path.join('..', 'demos', 'AA041', 'fasta')
outputdir = os.path.join('..', 'demos', 'AA041', 'output')

# instantiate client
fn3c = fn3Client(
)  # expects operation on local host; pass baseurl if somewhere else.

# names of the clustering algorithms
clusters = fn3c.clustering()

existing_guids = set(fn3c.guids())
clustering_created = False
print("There are {0} existing guids".format(len(existing_guids)))
# add control fasta files.  The system evaluates the %N in terms of the population existing
# we load 50 randomly selected guids as controls

for i, fastafile in enumerate(
        glob.glob(os.path.join(fastadir, 'control', '*.fasta'))):
    guid = "ctrl_" + os.path.basename(fastafile).replace('.fasta', '')
    seq = fn3c.read_fasta_file(fastafile)['seq']
    if not guid in existing_guids:
Exemple #2
0
    p.mkdir(parents=True, exist_ok=True)
    p = pathlib.Path(inputdir)
    p.mkdir(parents=True, exist_ok=True)

    # determine input files
    inputfiles = glob.glob(os.path.join(inputdir, '*.fasta'))
    random.shuffle(inputfiles)  # read them in order
    if len(inputfiles) < max_sequences:
        raise ValueError(
            "Asked to add {0} sequences, but only {1} are available in the input directory {2}"
            .format(max_sequences, len(inputfiles), inputdir))
    else:
        inputfiles = inputfiles[0:max_sequences]

    print("opening connection to fn3 server")
    fn3c = fn3Client(baseurl="http://127.0.0.1:5020")

    # determine all masked positions
    excluded_positions = fn3c.nucleotides_excluded()

    # determine how many samples there are currently in the server.
    nSamples = len(fn3c.guids())
    print("There are {0} existing samples.  Adding more ..".format(nSamples))

    # create output file with header line
    outputfile = os.path.join(outputdir, 'timings_{0}.tsv'.format(nSamples))
    nAdded_this_batch = 0
    with open(outputfile, 'w+t') as f:
        output_line = "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\n".format(
            'nSamples', 's_insert', 'e_insert', 'd_insert', 's_read', 'e_read',
            'd_read')
Example usage:

# first, a server must be running
python findNeighbour3-server.py ../demos/simulation/config/config.json
# then simulations must be generated (e.g. with run_simulation)
python run_simulation.py  ../output/simulation_set_1""")

    parser.add_argument('inputdir',
                        type=str,
                        nargs=1,
                        help='data will be read from the inputdir')
    args = parser.parse_args()
    basedir = os.path.abspath(args.inputdir[0])

    # connect to server
    fn3c = fn3Client("http://localhost:5020")

    # iterate over simulated data
    for inputdir in glob.glob(os.path.join(basedir, '*')):
        print(inputdir)

        # define filenames
        fasta_filename = os.path.join(inputdir, 'phylogeny.fasta')
        sequence_filename = os.path.join(inputdir, 'phylogeny.txt')
        observed_filename = os.path.join(inputdir, 'observed.txt')
        tree_filename = os.path.join(inputdir, 'phylogeny.nwk')
        ref_filename = os.path.join(inputdir, 'reference.fasta')
        treepic_filename = os.path.join(inputdir,
                                        '{0}.png'.format('tree_image'))
        annotated_treepic_filename = os.path.join(
            inputdir, '{0}.png'.format('annotated_tree_image'))
Exemple #4
0
    def __init__(self):

        self.fn3c = fn3Client()  # expect success