예제 #1
0
def LoadMostRecentModel(historical_snapshots_folder):
    print("Looking in", os.path.abspath(historical_snapshots_folder))

    subfolders = GetSubfolders(historical_snapshots_folder)
    if len(subfolders) == 0:
        return None
    most_recent_subfolder = natsort.natsorted(subfolders)[-1]
    filenames = glob.glob(os.path.join(most_recent_subfolder, "*"))
    most_recent_filename = natsort.natsorted(filenames)[-1]
    return most_recent_filename
예제 #2
0
 def generate_max_intensity_proj(self, image_folder_path, output_path):
     files = os.listdir(image_folder_path)
     natsort.natsorted(files, reverse=False)
     first_iteration = True
     max_index = len(files)
     for index in range(max_index):
         file = files[index]
         filepath = tools.make_path(image_folder_path, file)
         current_image = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)
         if first_iteration:
             max_projection = current_image
             first_iteration = False
         else:
             max_projection=np.maximum(max_projection, current_image)
     self.data_tracker.save_and_show(image=max_projection, filename=MAX_INTENSITY_PROJ_FILENAME,
                                     caption="Max intensity projection")
     return max_projection
예제 #3
0
def _703_prepare_data_splits():
    """
    Sample fram pathes for the i3d model.
    :return:
    """

    annot_dict_path = Pth(
        'EPIC-Kitchens/annotations/EPIC_train_action_labels_dict.pkl')
    annot_idxes_many_shots_path = Pth(
        'EPIC-Kitchens/annotations/annot_idxes_many_shots_noun_verb.pkl')
    video_names_splits_path = Pth(
        'EPIC-Kitchens/annotations/video_names_splits.pkl')

    annot_idxes_many_shots = utils.pkl_load(annot_idxes_many_shots_path)
    annot_dict = utils.pkl_load(annot_dict_path)

    # split_ratio
    split_ratio = 0.8
    person_videos_dict = {}

    # first loop to collect all unique video ids
    for annot_id in annot_idxes_many_shots:
        annot_line = annot_dict[annot_id]
        person_id = annot_line[0]
        video_id = annot_line[1]
        if person_id not in person_videos_dict:
            person_videos_dict[person_id] = []

        person_videos_dict[person_id].append(video_id)

    for person_id in person_videos_dict:
        video_names = natsort.natsorted(
            np.unique(person_videos_dict[person_id]))
        person_videos_dict[person_id] = video_names

    # now that we have collected the persons, and their videos, see how much videos if we split
    video_names_tr = []
    video_names_te = []

    for person_id in person_videos_dict:
        v_names = person_videos_dict[person_id]
        idx = int(len(v_names) * split_ratio)
        v_names_tr = v_names[:idx]
        v_names_te = v_names[idx:]
        video_names_tr += v_names_tr
        video_names_te += v_names_te

    video_names_tr = np.array(video_names_tr)
    video_names_te = np.array(video_names_te)

    print len(video_names_tr) + len(video_names_te)
    print len(video_names_tr)
    print len(video_names_te)

    # save video names
    utils.pkl_dump((video_names_tr, video_names_te), video_names_splits_path)
예제 #4
0
def Run_Params(params):
    print("Running params")
    # Setup log
    log = open(params.log_file_path, "a")
    print("first log")
    Log_Initialize(params, log)

    for epoch in range(params.epochs):
        log_print("epoch:", epoch, log=log)

        # training
        for data_file in natsorted(os.listdir(params.train_data_dir)):
            log_print("Training file:", data_file, log=log)
            log.close()

            params.data_file = data_file
            if test_epoch:
                train(params)
            else:
                with concurrent.futures.ProcessPoolExecutor() as executor:
                    future = executor.submit(train, params)
                    executor.shutdown(wait=True)
            log = open(params.log_file_path, "a")
            log_print("File", data_file, "is Done", log=log)

        # Validtion
        for data_file in natsorted(os.listdir(params.val_data_dir)):
            log_print("Validation file:", data_file, log=log)
            log.close()

            params.data_file = data_file
            with concurrent.futures.ProcessPoolExecutor() as executor:
                future = executor.submit(val, params)
                executor.shutdown(wait=True)
            log = open(params.log_file_path, "a")
            log_print("File", data_file, "is Done", log=log)

    return "Done"
예제 #5
0
def sort_and_print_entries(entries, args):
    """Sort the entries, applying the filters first if necessary."""

    # Extract the proper number type.
    num_type = {
        'digit': None,
        'version': None,
        'ver': None,
        'int': int,
        'float': float
    }[args.number_type]
    unsigned = not args.signed or num_type is None
    alg = (ns.INT * int(num_type in (int, None)) | ns.UNSIGNED * unsigned
           | ns.NOEXP * (not args.exp) | ns.PATH * args.paths
           | ns.LOCALE * args.locale)

    # Pre-remove entries that don't pass the filtering criteria
    # Make sure we use the same searching algorithm for filtering
    # as for sorting.
    do_filter = args.filter is not None or args.reverse_filter is not None
    if do_filter or args.exclude:
        inp_options = (ns.INT * int(num_type in (int, None))
                       | ns.UNSIGNED * unsigned | ns.NOEXP * (not args.exp),
                       '.')
        regex, num_function = _regex_and_num_function_chooser[inp_options]
        if args.filter is not None:
            lows, highs = ([f[0]
                            for f in args.filter], [f[1] for f in args.filter])
            entries = [
                entry for entry in entries
                if keep_entry_range(entry, lows, highs, num_function, regex)
            ]
        if args.reverse_filter is not None:
            lows, highs = ([f[0] for f in args.reverse_filter],
                           [f[1] for f in args.reverse_filter])
            entries = [
                entry for entry in entries if
                not keep_entry_range(entry, lows, highs, num_function, regex)
            ]
        if args.exclude:
            exclude = set(args.exclude)
            entries = [
                entry for entry in entries
                if exclude_entry(entry, exclude, num_function, regex)
            ]

    # Print off the sorted results
    for entry in natsorted(entries, reverse=args.reverse, alg=alg):
        print(entry)
예제 #6
0
파일: tasks.py 프로젝트: ti250/cdeweb
def get_result(f, fname):
    try:
        document = Document.from_file(f, fname=fname)
    except Exception:
        return {}
    records = document.records.serialize()
    records = natsort.natsorted(
        records,
        lambda x: x.get('labels', ['ZZZ%s' %
                                   (99 - len(x.get('names', [])))])[0])
    result = {
        'records': records,
        'abbreviations': document.abbreviation_definitions
    }
    return result
예제 #7
0
파일: render.py 프로젝트: tnk3r/pyVideoHub
def make_table_label_cur(optlist, optname, sel=0):
    TABLE_LABEL_CUR='''
    <select class="" name="###NAME###">
      ###OPTION###
    </select>
    '''
    opt=''
    for k in natsorted(optlist.keys()):
        v = optlist[k]
        if int(k) == sel:
            opt+= '<option selected="selected">'
        else:
            opt+= '<option>'
        opt+= '%d: %s' % (int(k)+1, v)
        opt+= '</option>'
    tmp=TABLE_LABEL_CUR.replace('###NAME###', optname)
    return tmp.replace('###OPTION###', opt)
예제 #8
0
def sort_and_print_entries(entries, args):
    """Sort the entries, applying the filters first if necessary."""

    # Extract the proper number type.
    is_float = args.number_type in ('float', 'real', 'f', 'r')
    signed = args.signed or args.number_type in ('real', 'r')
    alg = (ns.FLOAT * is_float |
           ns.SIGNED * signed |
           ns.NOEXP * (not args.exp) |
           ns.PATH * args.paths |
           ns.LOCALE * args.locale)

    # Pre-remove entries that don't pass the filtering criteria
    # Make sure we use the same searching algorithm for filtering
    # as for sorting.
    do_filter = args.filter is not None or args.reverse_filter is not None
    if do_filter or args.exclude:
        inp_options = (ns.FLOAT * is_float |
                       ns.SIGNED * signed |
                       ns.NOEXP * (not args.exp),
                       '.'
                       )
        regex, num_function = _regex_and_num_function_chooser[inp_options]
        if args.filter is not None:
            lows, highs = ([f[0] for f in args.filter],
                           [f[1] for f in args.filter])
            entries = [entry for entry in entries
                       if keep_entry_range(entry, lows, highs,
                                           num_function, regex)]
        if args.reverse_filter is not None:
            lows, highs = ([f[0] for f in args.reverse_filter],
                           [f[1] for f in args.reverse_filter])
            entries = [entry for entry in entries
                       if not keep_entry_range(entry, lows, highs,
                                               num_function, regex)]
        if args.exclude:
            exclude = set(args.exclude)
            entries = [entry for entry in entries
                       if exclude_entry(entry, exclude,
                                        num_function, regex)]

    # Print off the sorted results
    for entry in natsorted(entries, reverse=args.reverse, alg=alg):
        print(entry)
 def get_data(self):
     data_list = []
     data_list = os.listdir(self.root)
     data_list = natsort.natsorted(data_list)
     data_num = int(data_list[-1].split('.')[0]) + 1
     
     df = pd.read_csv(configs.train_label_dir, sep='\t', header=None)
     df.columns = ['mean', 'std', 'j1', 'j2', 'j3']
     label = df.loc[:data_num - 1, 'std'].to_numpy().reshape(-1, 1)
     
     # use original data
     train_data_num = math.floor(len(data_list) * 0.8)
     if(self.train == 0):
         # return data_list[:train_data_num], label[:train_data_num]
         return data_list, label
     elif(self.train == 1):
         return data_list[train_data_num:], label[train_data_num:]
     elif(self.train == 2):
         return data_list
예제 #10
0
def LoadModels(historical_snapshots_folder, backbone, day_number=None):
    print("Started dual-memory modelling, looking in",
          historical_snapshots_folder)

    # If there is no "most_recent_snapshot", return None:

    most_recent_snapshot = LoadMostRecentModel(historical_snapshots_folder)
    if most_recent_snapshot is None:
        return None

    print("Most recent snapshot:", most_recent_snapshot)

    # Search folder for a Day-10 snapshot:
    # f"historical_snapshots/Day{day_number}/snapshots/"

    if day_number is None:
        match = re.search("Day(\d+)", most_recent_snapshot)
        if match is None:
            raise ValueError("Filename doesn't conform to standard")

        day_number = int(match.group(1))
    print("Day number is:", day_number)

    if day_number > 10:
        print("Day number is greater than 10")
        find_day = day_number - 10
        folder = os.path.join(historical_snapshots_folder, f"Day{find_day}/")
        filenames = glob.glob(os.path.join(folder, "*"))
        combine_model_filename = natsort.natsorted(filenames)[-1]
        print("Done")

        # load and combine models:
        # models.load_model(model_filename, backbone_name=args.backbone)
        all_models = [
            models.load_model(most_recent_snapshot, backbone_name=backbone),
            models.load_model(combine_model_filename, backbone_name=backbone)
        ]

        return all_models
    else:
        return [
            models.load_model(most_recent_snapshot, backbone_name=backbone)
        ]
def sort_and_print_entries(entries, args):
    """Sort the entries, applying the filters first if necessary."""

    # Extract the proper number type.
    is_float = args.number_type in ('float', 'real', 'f', 'r')
    signed = args.signed or args.number_type in ('real', 'r')
    alg = (ns.FLOAT * is_float | ns.SIGNED * signed | ns.NOEXP * (not args.exp)
           | ns.PATH * args.paths | ns.LOCALE * args.locale)

    # Pre-remove entries that don't pass the filtering criteria
    # Make sure we use the same searching algorithm for filtering
    # as for sorting.
    do_filter = args.filter is not None or args.reverse_filter is not None
    if do_filter or args.exclude:
        inp_options = (ns.FLOAT * is_float | ns.SIGNED * signed | ns.NOEXP *
                       (not args.exp))
        regex = _regex_chooser[inp_options]
        if args.filter is not None:
            lows, highs = ([f[0]
                            for f in args.filter], [f[1] for f in args.filter])
            entries = [
                entry for entry in entries
                if keep_entry_range(entry, lows, highs, float, regex)
            ]
        if args.reverse_filter is not None:
            lows, highs = ([f[0] for f in args.reverse_filter],
                           [f[1] for f in args.reverse_filter])
            entries = [
                entry for entry in entries
                if not keep_entry_range(entry, lows, highs, float, regex)
            ]
        if args.exclude:
            exclude = set(args.exclude)
            entries = [
                entry for entry in entries
                if exclude_entry(entry, exclude, float, regex)
            ]

    # Print off the sorted results
    for entry in natsorted(entries, reverse=args.reverse, alg=alg):
        print(entry)
예제 #12
0
파일: pre_process.py 프로젝트: cannonja/jc2
    #PSU machines (linux lab)
    base1 = os.path.expanduser('~/dev/jc2')
    base2 = os.path.expanduser('~/Desktop')
    sys.path.append(os.path.join(base1, 'Projects/net'))
    file_path = base2 + '/kaggle/ultrasound'
    train_path = file_path + '/data/train'
    im_path = train_path + '.csv' 
    mask_path = file_path + '/data/train_mask.csv'

from feed_forward import ff_net

################################## Read training images and masks ####################################################

'''Image 19_8.tif appears to not have a mask and 19_9 seems to be missing'''
os.chdir(train_path)
train_all = natsort.natsorted(os.listdir())
train_im = train_all[slice(0,len(train_all),2)]
train_mask = train_all[slice(1,len(train_all),2)]
train_final = [(i, j) for i, j in zip(train_im, train_mask)]

im_dim = misc.imread(train_im[0]).shape
ims = np.zeros((np.prod(im_dim), len(train_im)))
for i in range(len(train_im)):
    ims[:, i] = misc.imread(train_im[i]).flatten()
np.savetxt(im_path, ims, delimiter=',')


masks = np.zeros((np.prod(im_dim), len(train_mask)))
for i in range(len(im_data)):
    masks[:, i] = misc.imread(train_mask[i]).flatten()
np.savetxt(mask_path, masks, delimiter=',')
예제 #13
0

if __name__=="__main__":
    parser = argparse.ArgumentParser()
    
    parser.add_argument('--seed', type=int, default=0, help="The random seed.")
    parser.add_argument('--raw_dir', type=str, required=True, help="The directory which contains the raw xml files.")
    parser.add_argument('--data_dir', type=str, default="data/opensubtitles-parsed", help="The parent directory for saving parsed data.")
    parser.add_argument('--bert_ckpt', type=str, default="bert-base-uncased", help="The checkpoint of the BERT to load the tokenizer.")
    parser.add_argument('--lam', type=int, default=2, help="The lambda value for the Poisson distribution.")
    parser.add_argument('--num_trunc', type=int, default=20, help="The number of turns to truncate.")

    args = parser.parse_args()
    
    file_list = glob(f"{args.raw_dir}/xml/en/*/*/*.xml")
    file_list = natsort.natsorted(file_list)
    print(file_list)
    print(f"The total number of files: {len(file_list)}")
    
    if not os.path.isdir(args.data_dir):
        os.makedirs(args.data_dir)
        
    # Load the tokenizer
    config = BertConfig.from_pretrained(args.bert_ckpt)
    tokenizer = BertTokenizer.from_pretrained(args.bert_ckpt)
    vocab = tokenizer.get_vocab()
    args.cls_id = vocab[tokenizer.cls_token]
    args.sep_id = vocab[tokenizer.sep_token]
    args.max_len = config.max_position_embeddings
    
    random.seed(args.seed)
예제 #14
0
import imageio
from os import listdir
from os.path import isfile, join

from natsort import natsort

images = []

PATH_TO_GIF = 'color-reduction.gif'
PATH_TO_IMAGES = "Images/"


filenames= [f for f in listdir(PATH_TO_IMAGES) if isfile(join(PATH_TO_IMAGES, f))]



filenames = natsort.natsorted(filenames,reverse=False)
print(filenames)




for filename in filenames:
    images.append(imageio.imread("Images/" + filename))
imageio.mimsave(PATH_TO_GIF, images, duration=1.5)
예제 #15
0
def get_img_paths(dir_path, wildcard='*.png'):
    return natsort.natsorted(glob.glob(dir_path + '/' + wildcard))
import os
import json
from natsort.natsort import natsorted

result = []

for path, dirname, filename in os.walk('.'):
    json_file_list = natsorted(
        [file for file in filename if file.endswith('数.json')])

    for json_file in json_file_list:
        with open(json_file, 'r', encoding='utf8') as f:
            dic = json.load(f)
            dic = {'title': json_file, **dic}
            result.append(dic)

with open('合计结果.json', 'w', encoding='utf8') as f:
    f.write(json.dumps(result, ensure_ascii=False))
예제 #17
0
def extractChromosomes(samtools, normal, tumors, reference=None):

    # Read the names of sequences in normal BAM file
    normal_sq = getSQNames(samtools, normal[0])

    # Extract only the names of chromosomes in standard formats
    '''
    Adding chromosome names to two different variables, checking if they exist, and then adding them to a third
    variable is redundant.
    chrm = set()
    no_chrm = set()
    '''

    chromosomes = set()
    for i in range(1, 23):
        if str(i) in normal_sq:
            # no_chrm.add(str(i))
            chromosomes.add(str(i))
        elif "chr" + str(i) in normal_sq:
            # chrm.add("chr" + str(i))
            chromosomes.add("chr" + str(i))
        else:
            sys.stderr.write(
                "WARNING: a chromosome named either {} or a variant of CHR{} cannot be found in the "
                "normal BAM file\n".format(i, i))

    # if len(chrm) == 0 and len(no_chrm) == 0: raise ValueError("No chromosomes found in the normal BAM")
    if chromosomes == 0:
        raise ValueError("No chromosomes found in the normal BAM")
    '''
    Adding chromosome names to two different variables, checking if they exist, and then adding them to a third
    variable is redundant.
    if len(chrm) > len(no_chrm):
        chromosomes = chrm
    else:
        chromosomes = no_chrm
    '''

    # Check that chromosomes with the same names are present in each tumor BAM contain
    for tumor in tumors:
        tumor_sq = getSQNames(samtools, tumor[0])
        if not chromosomes <= tumor_sq:
            sys.stderr.write(
                "WARNING: chromosomes {} are not present in the tumor sample {}\n"
                .format(chromosomes - tumor_sq, tumor))

    # Check consistency of chromosome names with the reference
    if reference:
        refdict = "{}.dict".format(reference)
        stdout, stderr = subprocess.Popen(
            "grep -e \"^>\" {}".format(reference),
            stdout=subprocess.PIPE,
            shell=True).communicate()

        if stderr:
            raise ValueError(
                "Error in reading the reference: {}".format(reference))
        else:
            ref = set(c[1:].strip().split()[0]
                      for c in stdout.strip().split('\n'))

        if not (chromosomes <= ref):
            raise ValueError(
                "The given reference cannot be used because the chromosome names are inconsistent!\n"
                "Chromosomes found in BAF files: {}\nChromosomes with the same name found in reference "
                "genome: {}".format(chromosomes, ref))

    # return sorted(list(chromosomes), key=sp.numericOrder)
    return natsort.natsorted(list(chromosomes))
예제 #18
0
            try:
                chromosome, coords = tsv_data["Mutation genome position"].split(":")
            except ValueError:
                # skip cosmic entries with no position data
                continue
            else:
                start, end = coords.split("-")
                tsv_data["chromosome"] = chromosome
                tsv_data["start"] = start
                tsv_data["end"] = end

                new_cosmic_lines.append(tsv_data)

        # tabix needs file to be sorted
        new_cosmic_lines.sort(key=lambda x: int(x["end"]))
        new_cosmic_lines.sort(key=lambda x: int(x["start"]))
        new_cosmic_lines = natsort.natsorted(new_cosmic_lines, key=lambda x: x["chromosome"])

        headers.extend(["chromosome", "start", "end"])
        out = open(output_fname, "w")
        out.write("#" + "\t".join(headers) + "\n")
        for tsv_data in new_cosmic_lines:
            out.write("\t".join([tsv_data[h] for h in headers]) + "\n")
        out.close()

        pysam.tabix_index(filename=output_fname, seq_col=34, start_col=35, end_col=36)

        print ("Creating second index for AA position...")
        # Create a second index file by gene and start AA, endAA
        TabixIndexer.indexGeneProteinPosition("Gene name", "Mutation AA", input_fname, output_fname + ".byAA")
예제 #19
0
def find_image_at_index(index, path):
    image_names = os.listdir(path)
    natsort.natsorted(image_names, reverse=False)
    image_name = image_names[index]
    image_path = make_path(path, image_name)
    return cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
예제 #20
0
def readBINs(normalbins, tumorbins):
    normalBINs = {}
    tumorBINs = {}
    normal = set()
    samples = set()
    normal_chr = set()
    tumor_chr = set()

    # Read normal bin counts
    with open(normalbins, 'r') as f:
        for line in f:
            parsed = line.strip().split()[:5]
            normal_chr.add(parsed[1])
            normal.add(parsed[0])
            if (parsed[1], int(parsed[2]), int(parsed[3])) not in normalBINs:
                normalBINs[parsed[1],
                           int(parsed[2]),
                           int(parsed[3])] = (parsed[0], int(parsed[4]))
            else:
                raise ValueError(
                    sp.error(
                        "Found multiple lines for the same interval in the normal bin counts!"
                    ))

    # Check normal bin counts
    if len(normal) > 1:
        raise ValueError(
            sp.error("Found multiple samples in normal bin counts!"))

    prev_r = -1
    prev_c = -1
    for key in sorted(normalBINs,
                      key=(lambda x:
                           (sp.numericOrder(x[0]), int(x[1]), int(x[2])))):
        l, r = int(key[1]), int(key[2])
        if l > r and prev_c == key[0]:
            raise ValueError(
                sp.error(
                    "Found an interval with START {} greater than END {} in normal bin counts!"
                    .format(key[1], key[2])))
        if l < prev_r and prev_c == key[0]:
            raise ValueError(
                sp.error(
                    "Found overlapping intervals one ending with {} and the next starting with {} in normal bin counts!"
                    .format(prev_r, key[1])))
        prev_r = r
        prev_c = key[0]

    # Read tumor bin counts
    with open(tumorbins, 'r') as f:
        for line in f:
            parsed = line.strip().split()[:5]
            tumor_chr.add(parsed[1])
            samples.add(parsed[0])
            try:
                tumorBINs[parsed[1], int(parsed[2]),
                          int(parsed[3])].add((parsed[0], int(parsed[4])))
            except KeyError:
                tumorBINs[parsed[1], int(parsed[2]), int(parsed[3])] = set()
                tumorBINs[parsed[1], int(parsed[2]),
                          int(parsed[3])].add((parsed[0], int(parsed[4])))

    # Check tumor bin counts
    prev_r = -1
    prev_c = -1
    num_samples = len(samples)
    for key in sorted(tumorBINs,
                      key=(lambda x:
                           (sp.numericOrder(x[0]), int(x[1]), int(x[2])))):
        l, r = int(key[1]), int(key[2])
        if len(tumorBINs[key]) != num_samples:
            raise ValueError(
                sp.error(
                    "Found multiple lines for the same interval in the tumor bin counts!"
                ))
        if l > r and prev_c == key[0]:
            raise ValueError(
                sp.error(
                    "Found an interval with START {} greater than END {} in tumor bin counts!"
                    .format(key[1], key[2])))
        if l < prev_r and prev_c == key[0]:
            raise ValueError(
                sp.error(
                    "Found overlapping intervals one ending with {} and the next starting with {} in tumor bin counts!"
                    .format(prev_r, key[1])))
        prev_r = r
        prev_c = key[0]

    if normal_chr != tumor_chr:
        raise ValueError(
            sp.error(
                "The chromosomes in normal and tumor bin counts are different!"
            ))
    if set(normalBINs) != set(tumorBINs):
        raise ValueError(
            sp.error(
                "The bins of the normal and tumor samples are different!"))

    # chromosomes = sorted(list(normal_chr), key=sp.numericOrder)
    chromosomes = natsort.natsorted(normal_chr)

    return normalBINs, tumorBINs, chromosomes, normal.pop(), samples
예제 #21
0
import imageio
import os
from natsort import natsort


def create_gif(filenames, duration):
    images = []
    for filename in filenames:
        imagees.append(imageio.imread(filename))
    output_file = 'raytrace_2.gif'
    imageio.mimsave(output_file, images, duration=duration)


path = '/home/vkvishal/Documents/RayTracing/'

filenames = []
for file in os.listdir(path):
    # filename = os.fsdecode(file)
    if file.endswith(('.bmp')):
        filenames.append(file)
filenames = natsort.natsorted(filenames)
create_gif(filenames, duration=5)
예제 #22
0
def get_BMP_file_paths(BMP_files_directory_path):
    BMP_file_path_pattern = os.path.join(BMP_files_directory_path, '*.bmp')
    BMP_file_paths = natsort.natsorted(glob.glob(BMP_file_path_pattern))
    return BMP_file_paths
예제 #23
0
def main(command_line_args=None):
    """
    Let's get this party started.
    :param command_line_args:
    """
    start_time = time.time()
    VersionDependencies.python_check()

    if not command_line_args:
        command_line_args = sys.argv

    run_start = datetime.datetime.today().strftime("%H:%M:%S %Y  %a %b %d")
    parser = argparse.ArgumentParser(
        description=
        "A package to map genomic repair scars at defined loci.\n {} v{}".
        format(__package__, __version__),
        formatter_class=argparse.RawTextHelpFormatter)

    parser.add_argument('--options_file',
                        action='store',
                        dest='options_file',
                        required=True,
                        help='File containing program parameters.')

    # Check options file for errors and return object.
    args = error_checking(string_to_boolean(parser))

    log = Tool_Box.Logger(args)
    Tool_Box.log_environment_info(log, args, command_line_args)

    module_name = ""
    log.info("{} v{}".format(__package__, __version__))

    if args.IndelProcessing:
        file_list = []
        if args.Platform == "Illumina" or args.Platform == "Ramsden" or args.Platform == "TruSeq":
            log.info("Sending FASTQ files to FASTQ preprocessor.")

            if args.PEAR:
                file_list = pear_consensus(args, log)
                if not file_list:
                    log.error("PEAR failed.  Check logs.")
                    raise SystemExit(1)
                fastq_consensus = file_list[0]

                fq1 = FASTQ_Tools.FASTQ_Reader(fastq_consensus, log)
                fq2 = None

            else:
                fq2 = FASTQ_Tools.FASTQ_Reader(args.FASTQ2, log)
                fq1 = FASTQ_Tools.FASTQ_Reader(args.FASTQ1, log)

            sample_manifest = Tool_Box.FileParser.indices(
                log, args.SampleManifest)
            indel_processing = \
                Indel_Processing.DataProcessing(log, args, run_start, __version__,
                                                Target_Mapper.TargetMapper(log, args, sample_manifest), fq1, fq2)

            indel_processing.main_loop()

            # Compress or delete PEAR files.
            if args.PEAR and file_list:
                if args.DeleteConsensusFASTQ:
                    log.info("Deleting PEAR FASTQ Files.")
                    Tool_Box.delete(file_list)
                else:
                    log.info(
                        "Compressing {} FASTQ Files Generated by PEAR.".format(
                            len(file_list)))
                    p = pathos.multiprocessing.Pool(int(args.Spawn))
                    p.starmap(Tool_Box.compress_files,
                              zip(file_list, itertools.repeat(log)))
        else:
            log.error(
                "Only 'Illumina', 'TruSeq' or 'Ramsden' --Platform methods currently allowed."
            )
            raise SystemExit(1)

    elif not args.IndelProcessing:
        # Run frequency file Combine module
        run_start = datetime.datetime.today().strftime("%a %b %d %H:%M:%S %Y")
        log.info("Process Replicates.")
        data_dict = collections.defaultdict(list)
        file_list = [
            f for f in glob.glob("{}*ScarMapper_Frequency.txt".format(
                args.DataFiles, ))
        ]
        file_count = len(file_list)
        page_header = "# ScarMapper File Merge v{}\n# Run: {}\n# Sample Name: {}\n" \
            .format(__version__, run_start, args.SampleName)

        line_num = 0
        index_file = list(csv.reader(open(file_list[0]), delimiter='\t'))
        for line in index_file:
            if not line:
                break
            elif line_num > 3:
                page_header += "{}\n".format(line[0])

            line_num += 1
        page_header += "\n\n"

        for file_name in file_list:
            freq_file_data = Tool_Box.FileParser.indices(log, file_name)

            for row in freq_file_data:
                key = "{}|{}|{}|{}".format(row[3], row[4], row[6], row[8])
                row_data = row[2:]

                if key in data_dict:
                    data_dict[key][0].append(float(row[1]))
                else:
                    data_dict[key] = [[float(row[1])], row_data]

        # Process Data and Write Combined Frequency results file

        plot_data_dict = collections.defaultdict(list)
        label_dict = collections.defaultdict(float)
        output_data_dict = collections.defaultdict(list)
        marker_list = []

        for key, row_list in data_dict.items():
            # Force pattern to be in at least half of the files.
            if len(row_list[0]) / file_count >= 0.5:
                row_string = "\t".join(row_list[1])
                freq = gmean(row_list[0])
                sem = stats.sem(row_list[0])
                freq_results_outstring = "{}\t{}\t{}\n".format(
                    freq, sem, row_string)
                output_key = freq

                # Freq is a 17 digit float so it is very unlikely to be duplicated but if it is this increments it by
                # a small number then checks the uniqueness again.
                if output_key in output_data_dict:
                    output_key = output_key + 1e-16
                    if output_key in output_data_dict:
                        output_key = output_key + 1e-16

                scar_type = row_list[1][0]
                label_dict[scar_type] += freq

                # Gather up our data for plotting
                lft_del = int(row_list[1][1])
                rt_del = int(row_list[1][2])
                mh_size = int(row_list[1][5])
                ins_size = int(row_list[1][7])

                output_data_dict[output_key] = \
                    [(freq, lft_del, rt_del, mh_size, ins_size, scar_type), freq_results_outstring]

        freq_results_outstring = \
            "{}# Frequency\tSEM\tScar Type\tLeft Deletions\tRight Deletions\tDeletion Size\tMicrohomology\t" \
            "Microhomology Size\tInsertion\tInsertion Size\tLeft Template\tRight Template\tConsensus Left Junction\t" \
            "Consensus Right Junction\tTarget Left Junction\tTarget Right Junction\tConsensus\tTarget Region\n" \
            .format(page_header)

        # Now draw a pretty graph of the data if we are not dealing with a negative control.
        for k in natsort.natsorted(output_data_dict, reverse=True):
            data_list = output_data_dict[k]
            freq_results_outstring += data_list[1]

            freq = data_list[0][0]
            lft_del = data_list[0][1]
            rt_del = data_list[0][2]
            mh_size = data_list[0][3]
            ins_size = data_list[0][4]
            scar_type = data_list[0][5]

            # Plotting all scar patterns is messy.  This provides a cutoff.
            if freq < 0.00025:
                continue

            y_value = freq * 0.5
            lft_ins_width = freq
            rt_ins_width = freq

            # This is gathered up to find the largest value.  Used to set the x-axis limits.
            marker_list.extend([
                lft_del + (mh_size * 0.5), rt_del + (mh_size * 0.5), ins_size
            ])

            # Deletion size included half the size of any microhomology present.
            lft_del_plot_value = (lft_del + (mh_size * 0.5)) * -1
            rt_del_plot_value = rt_del + (mh_size * 0.5)

            # Insertions are centered on 0 so we need to take half the value for each side.
            lft_ins_plot_value = (ins_size * 0.5) * -1
            rt_ins_plot_value = ins_size * 0.5

            # Scale the width of bars for insertions inside of deletions
            if lft_del + (mh_size * 0.5) != 0:
                lft_ins_width = freq * 0.5
            if rt_del + (mh_size * 0.5) != 0:
                rt_ins_width = freq * 0.5

            if scar_type not in plot_data_dict:
                plot_data_dict[scar_type] = \
                    [[freq], [lft_del_plot_value], [rt_del_plot_value], [lft_ins_plot_value],
                     [rt_ins_plot_value], [lft_ins_width], [rt_ins_width], [y_value]]
            else:
                # Get some previous plot data
                count = len(plot_data_dict[scar_type][0])
                previous_freq = plot_data_dict[scar_type][0][count - 1]
                previous_y = plot_data_dict[scar_type][7][count - 1]

                plot_data_dict[scar_type][0].append(freq)
                plot_data_dict[scar_type][1].append(lft_del_plot_value)
                plot_data_dict[scar_type][2].append(rt_del_plot_value)
                plot_data_dict[scar_type][3].append(lft_ins_plot_value)
                plot_data_dict[scar_type][4].append(rt_ins_plot_value)
                plot_data_dict[scar_type][5].append(lft_ins_width)
                plot_data_dict[scar_type][6].append(rt_ins_width)

                # Use the previous plot data to find the y-value of the current bar.
                plot_data_dict[scar_type][7] \
                    .append(previous_y + 0.002 + (0.5 * previous_freq) + y_value)

        plot_data_dict['Marker'] = [(max(marker_list)) * -1, max(marker_list)]
        # sample_name = "{}.{}".format(args.Job_Name, args.SampleName)

        ScarMapperPlot.scarmapperplot(args,
                                      datafile=None,
                                      sample_name=args.SampleName,
                                      plot_data_dict=plot_data_dict,
                                      label_dict=label_dict)

        freq_results_file = \
            open("{}{}_ScarMapper_Combined_Frequency.txt".format(args.WorkingFolder, args.SampleName), "w")

        freq_results_file.write(freq_results_outstring)
        freq_results_file.close()

    warning = "\033[1;31m **See warnings above**\033[m" if log.warning_occurred else ''
    elapsed_time = int(time.time() - start_time)
    log.info(
        "****ScarMapper {0} complete ({1} seconds, {2} Mb peak memory).****".
        format(module_name, elapsed_time, Tool_Box.peak_memory(), warning))

    # All done so we need to quit otherwise Python will not release the log file on virtual Linux.
    exit(0)
예제 #24
0
        for info in infos.iter('bndbox'):
            x1 = float(info.find('xmin').text)
            y1 = float(info.find('ymin').text)
            x2 = float(info.find('xmax').text)
            y2 = float(info.find('ymax').text)

            idx_name = name_idx[name]
            w = (x2 - x1) / img_w
            h = (y2 - y1) / img_h
            x = (x1 / img_w) + (w / 2.)
            y = (y1 / img_h) + (h / 2.)
            txt_info = ("%g " * 5 + "\n") % (idx_name, x, y, w, h)
            result_txt.write(txt_info)


if __name__ == '__main__':
    rotated_img_path = "/home/kinsozheng/Desktop/generate_test_dataset/rotated_set/rotated_img"
    files = natsort.natsorted(os.listdir(rotated_img_path))
    names = []

    for file in files:
        name = os.path.splitext(file)[0]
        names.append(name)

    pool = Pool()

    pool.map(xml_to_txt, tqdm(names))

    pool.close()
    pool.join()
예제 #25
0
def _initialize_config_from_meta(ts_metadata_dict, context_meta,
                                 variable_meta):
    """
    Prepares the correlation loop configuration from the uploaded metadata of selected timeseries.

    :param ts_metadata_dict: uploaded metadata
    :type ts_metadata_dict: dict
    :param context_meta: name of metadata providing the context.

    Example with Airbus datasets: "FlightIdentifier"

    :type context_meta: str
    :param variable_meta: name of the metadata providing the variable name: variables are sorted by alphanumeric order.

    Example with Airbus datasets: "metric"

    :type variable_meta: str
    :return: computed config is multiple result:
      - config_corr_loop: list of ( <context index>, [ (<var index 1> , <tsuid 1>), ..., (<var index N> , <tsuid N>) ] )
      - contexts: ordered list of contexts:  <context> = contexts[<context_index>]
      - variables: ordered list of variables: <variable name> = variables[ <var index> ]
    :rtype: list, list list
    :raise exception: IkatsInputContentError when an inconsistency cancels the correlations computing
    """
    ts_metadata_accepted = defaultdict(dict)
    ts_variables_accepted_set = set()
    for tsuid, meta in ts_metadata_dict.items():
        if context_meta not in meta:
            LOGGER.info("- Ignored: TS without context (meta %s): %s",
                        context_meta, tsuid)
        elif variable_meta not in meta:
            LOGGER.info("- Ignored: TS without defined variable (meta %s): %s",
                        variable_meta, tsuid)
        else:
            context_value = meta[context_meta]
            variable_name = meta[variable_meta]
            if variable_name in ts_metadata_accepted[context_value]:
                msg = "Inconsistency: context={} variable={} should match 1 TS: got at least 2 TS {} {}"
                raise IkatsInputContentError(
                    msg.format(
                        tsuid,
                        ts_metadata_accepted[context_value][variable_name]))

            ts_metadata_accepted[context_value][variable_name] = tsuid

    def ignore_unique_ts(the_context, tsuid_by_var):
        """
        - removes context with one single ts => useless to compute correlation
        - or else completes the set ts_variables_accepted_set

        :param the_context:
        :param tsuid_by_var:
        :return:
        """

        if len(tsuid_by_var) == 1:
            LOGGER.info("- Ignored: unique TS in context %s=%s: %s",
                        context_meta, the_context,
                        list(tsuid_by_var.values())[0])
        else:
            for var in tsuid_by_var:
                ts_variables_accepted_set.add(var)
        return len(tsuid_by_var) == 1

    ts_metadata_accepted = {
        ctx: tsuid_by_var
        for ctx, tsuid_by_var in ts_metadata_accepted.items()
        if ignore_unique_ts(ctx, tsuid_by_var) == False
    }

    # provides translation indexes => value on contexts
    contexts = natsorted(ts_metadata_accepted.keys())
    # provides translation indexes => value on variables
    variables = natsorted(ts_variables_accepted_set)

    # computes the corr_loop_config
    # ( <context index>, [ (<var index 1> , <tsuid 1>), ..., (<var index N> , <tsuid N>) ] )
    #
    # Note: sorted( [ (2, "TS2"), (1, "TS1"), (0, "TS0"), ] )
    #       returns [(0, 'TS0'), (1, 'TS1'), (2, 'TS2')]
    #
    corr_loop_config = [(contexts.index(ctx),
                         sorted([(variables.index(var), tsuid)
                                 for var, tsuid in tsuid_by_var.items()]))
                        for ctx, tsuid_by_var in ts_metadata_accepted.items()]

    return corr_loop_config, contexts, variables
예제 #26
0
 def connect(self):
     bin_sub_path = 'Bin\\Matrix.exe'
     library_sub_path = 'SDK\\RemoteAccess\\RemoteAccess_API.dll'
     name = os.path.basename(bin_sub_path)
     ok = False
     ps = psutil.process_iter()
     p = next(ps)
     go = True
     while go and not ok:
         try:
             p_path = p.exe()
             p_name = p.name()
         except:
             p_path = ''
             p_name = ''
         if p_name == name:
             installation_directory = p_path[:-(len(bin_sub_path) + 1)]
             library_path = os.path.join(installation_directory,
                                         library_sub_path)
             ok = os.path.exists(library_path)
         try:
             p = next(ps)
         except StopIteration:
             go = False
     ps.close()
     co = ''
     if ok:
         try:
             pe = pefile.PE(p_path)
             if pe.FILE_HEADER.Machine != self.machine:
                 self.log.AppendText('Bit architecture mismatch.\n')
                 pe.close()
                 pe = None
         except:
             pe = None
         if hasattr(pe, 'FileInfo') and pe.FileInfo:
             if isinstance(pe.FileInfo[0], list):
                 file_info = pe.FileInfo[0]
             else:
                 file_info = pe.FileInfo
             entries = [i for i in file_info if hasattr(i, 'StringTable')]
             if entries:
                 st_entries = [i for i in entries[0].StringTable]
                 if st_entries:
                     co = st_entries[0].entries[b'CompanyName'].decode()
             pe.close()
     if co:
         user_config_dir = os.environ['APPDATA']
         all_default_paths = natsort.natsorted(
             os.listdir(f"{user_config_dir}\\{co}\\MATRIX"))
         exp_sub_path = f'MATRIX\\{all_default_paths[-1]}\\Experiments'
         self.experiments_directory = os.path.join(user_config_dir, co,
                                                   exp_sub_path)
         self.lib_mate = ctypes.cdll.LoadLibrary(library_path)
         self.lib_mate.setHost(b'localhost')
         self.disconnect()
         if self.is_ran_down or self.testmode:
             rc = self.lib_mate.init(installation_directory.encode())
             self.log.AppendText('Connecting to the MATRIX, response: ' +
                                 self.rc_key(rc) + '.\n')
             if (rc == self.rcs['RMT_SUCCESS']) or self.testmode:
                 self.is_ran_down = False
                 self.experiment()
                 if not self.online:
                     self.disconnect()
                 else:
                     self.rc = self.rcs['RMT_SUCCESS']
             else:
                 self.rc = rc
     else:
         self.log.AppendText('Connecting to the MATRIX, response: '
                             '---.\n')
예제 #27
0
        clusters[item['date']].append(item)
    return clusters.values()


target = 'site/content/calendar'
file_name_base = 'day-%d.md'

data_src = '../advent_2016_master.csv'

with open(data_src, 'r') as f:
    data = csv.DictReader(f)
    data = filter(lambda x: len(x['DISP DATE']) != 0, data)
    women = [parse_row(x) for x in data]

selection = [generate_struct(x, i) for i, x in enumerate(women)]
clusters = group_clusters(selection)

for item in clusters:
    formatted = format_cluster(item)
    path = os.path.join(target, file_name_base % int(item[0]['date']))
    with open(path, 'w') as f:
        f.write(formatted.encode('UTF-8'))

sources = ", ".join(
    natsort.natsorted(
        set(
            filter(lambda x: len(x) > 0,
                   [y.strip().decode('utf-8') for y in set(sources)]))))

# print sources
예제 #28
0
def fiFindByWildcard(wildcard):
    return natsort.natsorted(glob.glob(wildcard, recursive=True))
예제 #29
0
                raise ValueError('This appears to be an old version of COSMIC, please use a newer version (v76 or above).')
            try:
                chromosome, coords = tsv_data['Mutation genome position'].split(':')
            except ValueError:
                #skip cosmic entries with no position data
                continue
            else:
                start, end = coords.split('-')
                tsv_data['chromosome'] = chromosome
                tsv_data['start'] = start
                tsv_data['end'] = end

                new_cosmic_lines.append(tsv_data)

        #tabix needs file to be sorted
        new_cosmic_lines.sort(key=lambda x: int(x['end']))
        new_cosmic_lines.sort(key=lambda x: int(x['start']))
        new_cosmic_lines = natsort.natsorted(new_cosmic_lines, key=lambda x: x['chromosome'])

        headers.extend(['chromosome', 'start', 'end'])
        out = open(output_fname, 'w')
        out.write('#' + '\t'.join(headers) + '\n')
        for tsv_data in new_cosmic_lines:
            out.write('\t'.join([tsv_data[h] for h in headers]) + '\n')
        out.close()

        pysam.tabix_index(filename=output_fname, seq_col=34, start_col=35, end_col=36)

        print("Creating second index for AA position...")
        # Create a second index file by gene and start AA, endAA
        TabixIndexer.indexGeneProteinPosition("Gene name", "Mutation AA", input_fname, output_fname + ".byAA")
예제 #30
0
파일: emft.py 프로젝트: gitter-badger/EMFT
def find_latest_trmt(folder: Path or str):
    return natsorted(
        [Path(f).abspath() for f in Path(folder).listdir('TRMT_*.miz')]).pop()
예제 #31
0
import os
import numpy
from natsort import natsort
import cv2

root_dir = "/home/tonner/Downloads/GaitDatasetB-silh/001/cl-01/054"
image_files = os.listdir(root_dir)
image_files = natsort.natsorted(image_files)
diff_img1 = []

N = len(image_files)
print(image_files)
for i in range(N):
    img = cv2.imread(os.path.join(root_dir, image_files[i]))
    if i == 0:
        diff_img = img
        diff_img1 = diff_img
    else:

        diff_img = cv2.imread(os.path.join(root_dir, image_files[i - 1]))
        diff_img1 = abs(img - diff_img)
        diff_img1 += diff_img1

cv2.imshow("diffrence", diff_img1)
cv2.waitKey(1)
cv2.imwrite("diffrence1.png", diff_img1)

AEI = diff_img1 / N
cv2.imwrite("diffrence2.png", AEI)
# cv2.imshow("diffrence",AEI)
# cv2.waitKey(1)
예제 #32
0
def natural_sorted(list_string, reverse=False):
    return natsort.natsorted(list_string, reverse=reverse)
        img.putpixel(coordinate, (128, 0, 0))

    for coordinate in fifth:
        img.putpixel(coordinate, (128, 0, 128))

    for coordinate in sixth:
        img.putpixel(coordinate, (255, 255, 255))

    for coordinate in seven:
        img.putpixel(coordinate, (0, 0, 0))

    correct_save_path = "/home/kinsozheng/Desktop/compare_voc/8bit_fix/" + img_name + ".png"
    img.save(correct_save_path)


if __name__ == '__main__':
    path = "/home/kinsozheng/Desktop/compare_voc/24bit_ori/"
    files = natsort.natsorted(os.listdir(path))

    file_names = []

    for file in files:
        name = os.path.splitext(file)[0]
        file_names.append(name)

    pool = Pool()
    pool.map(fix_eight_bit_problem, file_names)

    pool.close()
    pool.join()