Example #1
0
    def tqdm(*args, **kwargs):
        new_kwargs = {
                'mininterval': Tqdm.default_mininterval,
                **kwargs
        }

        return _tqdm(*args, **new_kwargs)
Example #2
0
def cluster_combine_dist(locs):
    print("Calculating distances...")

    if hasattr(locs, "z"):
        print("XYZ")
        pixelsize = int(input("Enter the pixelsize in nm/px:"))

        combined_locs = []
        for group in _tqdm(_np.unique(locs["group"])):
            temp = locs[locs["group"] == group]
            cluster = _np.unique(temp["cluster"])
            n_cluster = len(cluster)
            mean_frame = temp["mean_frame"]
            std_frame = temp["std_frame"]
            com_x = temp["x"]
            com_y = temp["y"]
            com_z = temp["z"]
            std_x = temp["lpx"]
            std_y = temp["lpy"]
            std_z = temp["lpz"]
            group_id = temp["group"]
            n = temp["n"]
            min_dist = _np.zeros(n_cluster)
            min_dist_xy = _np.zeros(n_cluster)
            for i, clusterval in enumerate(cluster):
                # find nearest neighbor in xyz
                group_locs = temp[temp["cluster"] != clusterval]
                cluster_locs = temp[temp["cluster"] == clusterval]
                ref_point = _np.array([
                    cluster_locs.x,
                    cluster_locs.y,
                    cluster_locs.z / pixelsize,
                ])
                all_points = _np.array(
                    [group_locs.x, group_locs.y, group_locs.z / pixelsize])
                distances = distance.cdist(ref_point.transpose(),
                                           all_points.transpose())
                min_dist[i] = _np.amin(distances)
                # find nearest neighbor in xy
                ref_point_xy = _np.array([cluster_locs.x, cluster_locs.y])
                all_points_xy = _np.array([group_locs.x, group_locs.y])
                distances_xy = distance.cdist(ref_point_xy.transpose(),
                                              all_points_xy.transpose())
                min_dist_xy[i] = _np.amin(distances_xy)

            clusters = _np.rec.array(
                (
                    group_id,
                    cluster,
                    mean_frame,
                    com_x,
                    com_y,
                    com_z,
                    std_frame,
                    std_x,
                    std_y,
                    std_z,
                    n,
                    min_dist,
                    min_dist_xy,
                ),
                dtype=[
                    ("group", group.dtype),
                    ("cluster", cluster.dtype),
                    ("mean_frame", "f4"),
                    ("x", "f4"),
                    ("y", "f4"),
                    ("z", "f4"),
                    ("std_frame", "f4"),
                    ("lpx", "f4"),
                    ("lpy", "f4"),
                    ("lpz", "f4"),
                    ("n", "i4"),
                    ("min_dist", "f4"),
                    ("mind_dist_xy", "f4"),
                ],
            )
            combined_locs.append(clusters)

    else:  # 2D case
        print("XY")
        combined_locs = []
        for group in _tqdm(_np.unique(locs["group"])):
            temp = locs[locs["group"] == group]
            cluster = _np.unique(temp["cluster"])
            n_cluster = len(cluster)
            mean_frame = temp["mean_frame"]
            std_frame = temp["std_frame"]
            com_x = temp["x"]
            com_y = temp["y"]
            std_x = temp["lpx"]
            std_y = temp["lpy"]
            group_id = temp["group"]
            n = temp["n"]
            min_dist = _np.zeros(n_cluster)

            for i, clusterval in enumerate(cluster):
                # find nearest neighbor in xyz
                group_locs = temp[temp["cluster"] != clusterval]
                cluster_locs = temp[temp["cluster"] == clusterval]
                ref_point_xy = _np.array([cluster_locs.x, cluster_locs.y])
                all_points_xy = _np.array([group_locs.x, group_locs.y])
                distances_xy = distance.cdist(ref_point_xy.transpose(),
                                              all_points_xy.transpose())
                min_dist[i] = _np.amin(distances_xy)

            clusters = _np.rec.array(
                (
                    group_id,
                    cluster,
                    mean_frame,
                    com_x,
                    com_y,
                    std_frame,
                    std_x,
                    std_y,
                    n,
                    min_dist,
                ),
                dtype=[
                    ("group", group.dtype),
                    ("cluster", cluster.dtype),
                    ("mean_frame", "f4"),
                    ("x", "f4"),
                    ("y", "f4"),
                    ("std_frame", "f4"),
                    ("lpx", "f4"),
                    ("lpy", "f4"),
                    ("n", "i4"),
                    ("min_dist", "f4"),
                ],
            )
            combined_locs.append(clusters)

    combined_locs = stack_arrays(combined_locs, asrecarray=True, usemask=False)
    return combined_locs
Example #3
0
def cluster_combine(locs):
    print("Combining localizations...")
    combined_locs = []
    if hasattr(locs[0], "z"):
        print("z-mode")
        for group in _tqdm(_np.unique(locs["group"])):
            temp = locs[locs["group"] == group]
            cluster = _np.unique(temp["cluster"])
            n_cluster = len(cluster)
            mean_frame = _np.zeros(n_cluster)
            std_frame = _np.zeros(n_cluster)
            com_x = _np.zeros(n_cluster)
            com_y = _np.zeros(n_cluster)
            com_z = _np.zeros(n_cluster)
            std_x = _np.zeros(n_cluster)
            std_y = _np.zeros(n_cluster)
            std_z = _np.zeros(n_cluster)
            group_id = _np.zeros(n_cluster)
            n = _np.zeros(n_cluster, dtype=_np.int32)
            for i, clusterval in enumerate(cluster):
                cluster_locs = temp[temp["cluster"] == clusterval]
                mean_frame[i] = _np.mean(cluster_locs.frame)
                com_x[i] = _np.average(cluster_locs.x,
                                       weights=cluster_locs.photons)
                com_y[i] = _np.average(cluster_locs.y,
                                       weights=cluster_locs.photons)
                com_z[i] = _np.average(cluster_locs.z,
                                       weights=cluster_locs.photons)
                std_frame[i] = _np.std(cluster_locs.frame)
                std_x[i] = _np.std(cluster_locs.x) / _np.sqrt(
                    len(cluster_locs))
                std_y[i] = _np.std(cluster_locs.y) / _np.sqrt(
                    len(cluster_locs))
                std_z[i] = _np.std(cluster_locs.z) / _np.sqrt(
                    len(cluster_locs))
                n[i] = len(cluster_locs)
                group_id[i] = group
            clusters = _np.rec.array(
                (
                    group_id,
                    cluster,
                    mean_frame,
                    com_x,
                    com_y,
                    com_z,
                    std_frame,
                    std_x,
                    std_y,
                    std_z,
                    n,
                ),
                dtype=[
                    ("group", group.dtype),
                    ("cluster", cluster.dtype),
                    ("mean_frame", "f4"),
                    ("x", "f4"),
                    ("y", "f4"),
                    ("z", "f4"),
                    ("std_frame", "f4"),
                    ("lpx", "f4"),
                    ("lpy", "f4"),
                    ("lpz", "f4"),
                    ("n", "i4"),
                ],
            )
            combined_locs.append(clusters)
    else:
        for group in _tqdm(_np.unique(locs["group"])):
            temp = locs[locs["group"] == group]
            cluster = _np.unique(temp["cluster"])
            n_cluster = len(cluster)
            mean_frame = _np.zeros(n_cluster)
            std_frame = _np.zeros(n_cluster)
            com_x = _np.zeros(n_cluster)
            com_y = _np.zeros(n_cluster)
            std_x = _np.zeros(n_cluster)
            std_y = _np.zeros(n_cluster)
            group_id = _np.zeros(n_cluster)
            n = _np.zeros(n_cluster, dtype=_np.int32)
            for i, clusterval in enumerate(cluster):
                cluster_locs = temp[temp["cluster"] == clusterval]
                mean_frame[i] = _np.mean(cluster_locs.frame)
                com_x[i] = _np.average(cluster_locs.x,
                                       weights=cluster_locs.photons)
                com_y[i] = _np.average(cluster_locs.y,
                                       weights=cluster_locs.photons)
                std_frame[i] = _np.std(cluster_locs.frame)
                std_x[i] = _np.std(cluster_locs.x) / _np.sqrt(
                    len(cluster_locs))
                std_y[i] = _np.std(cluster_locs.y) / _np.sqrt(
                    len(cluster_locs))
                n[i] = len(cluster_locs)
                group_id[i] = group
            clusters = _np.rec.array(
                (
                    group_id,
                    cluster,
                    mean_frame,
                    com_x,
                    com_y,
                    std_frame,
                    std_x,
                    std_y,
                    n,
                ),
                dtype=[
                    ("group", group.dtype),
                    ("cluster", cluster.dtype),
                    ("mean_frame", "f4"),
                    ("x", "f4"),
                    ("y", "f4"),
                    ("std_frame", "f4"),
                    ("lpx", "f4"),
                    ("lpy", "f4"),
                    ("n", "i4"),
                ],
            )
            combined_locs.append(clusters)

    combined_locs = stack_arrays(combined_locs, asrecarray=True, usemask=False)

    return combined_locs
Example #4
0
    return theta


def fit_spots_parallel(spots, async=False):
    n_workers = max(1, int(0.75 * _multiprocessing.cpu_count()))
    n_spots = len(spots)
    n_tasks = 100 * n_workers
    spots_per_task = [int(n_spots / n_tasks + 1) if _ < n_spots % n_tasks else int(n_spots / n_tasks) for _ in range(n_tasks)]
    start_indices = _np.cumsum([0] + spots_per_task[:-1])
    fs = []
    executor = _futures.ProcessPoolExecutor(n_workers)
    for i, n_spots_task in zip(start_indices, spots_per_task):
        fs.append(executor.submit(fit_spots, spots[i:i+n_spots_task]))
    if async:
        return fs
    with _tqdm(total=n_tasks, unit='task') as progress_bar:
        for f in _futures.as_completed(fs):
            progress_bar.update()
    return fits_from_futures(fs)


def fits_from_futures(futures):
    theta = [_.result() for _ in futures]
    return _np.vstack(theta)


def locs_from_fits(identifications, theta, box, em):
    # box_offset = int(box/2)
    x = theta[:, 0] + identifications.x     # - box_offset
    y = theta[:, 1] + identifications.y     # - box_offset
    lpx = _postprocess.localization_precision(theta[:, 2], theta[:, 4], theta[:, 3], em=em)
Example #5
0
def tqdm(a, *args, **kwargs):
    return _tqdm(a, ncols=100, *args, **kwargs)  # if config.CONFIG.tqdm else a
Example #6
0
def initialize_database(study_file, grasp_file, commit_every=250000,
                        progress=False):
    """Create the database quickly.

    :study_file:   Tab delimited GRASP study file, available here:
                   `<github.com/MikeDacre/grasp/blob/master/grasp_studies.txt>`_
    :grasp_file:   Tab delimited GRASP file.
    :commit_every: How many rows to go through before commiting to disk.
    :progress:     Display a progress bar (db length hard coded).

    """
    rows        = 0
    count       = commit_every
    pphenos     = {}
    phenos      = {}
    platforms   = {}
    populations = {}


    # Create tables
    _, engine = get_session()
    print('Dropping and creating database tables, this may take a while if',
          'the old database is large.')
    if _config['DEFAULT']['DatabaseType'] == 'sqlite':
        cfile = _os.path.isfile(_config['sqlite']['DatabaseFile'])
        if _os.path.isfile(cfile):
            _os.remove(cfile)
    _Base.metadata.drop_all(engine)
    _Base.metadata.create_all(engine)
    print('Tables created.')
    conn = engine.connect()

    # Get tables
    study_table = _Study.__table__
    snp_table   = _SNP.__table__
    pheno_table = _Phenotype.__table__
    pcat_table  = _PhenoCats.__table__
    plat_table  = _Platform.__table__
    pop_table   = _Population.__table__

    # Create insert statements
    study_ins   = study_table.insert()
    snp_ins     = snp_table.insert()
    pheno_ins   = pheno_table.insert()
    pcat_ins    = pcat_table.insert()
    plat_ins    = plat_table.insert()
    pop_ins     = pop_table.insert()
    phsnp_ins   = _snp_pheno_assoc.insert()
    phstudy_ins = _study_pheno_assoc.insert()
    plstudy_ins = _study_plat_assoc.insert()

    # Unique ID counters
    spare_id = 1
    pheno_id = 1
    pcat_id  = 1
    plat_id  = 1
    pop_id   = 1

    # Lists to hold records
    pheno_records   = []
    pcat_records    = []
    plat_records    = []
    pop_records     = []
    study_records   = []
    snp_records     = []
    phsnp_records   = []
    phstudy_records = []
    plstudy_records = []

    # Platform parsing regex
    plat_parser  = _recompile(r'^([^[]*)\[([^]]+)\]?(.*)')

    # Build study information from study file
    print('Parsing study information.')
    with _open_zipped(study_file) as fin:
        # Drop header
        fin.readline()

        if progress:
            pbar = _tqdm(total=2083, unit='studies')
        for line in fin:
            f = line.rstrip().split('\t')

            # Get primary phenotype
            ppheno = _cleanstr(f[7].strip())
            if ppheno not in pphenos:
                pheno_records.append({'phenotype': ppheno,
                                      'id': pheno_id})
                pphenos[ppheno] = pheno_id
                pheno_id += 1

            # Get phenotype categories
            pheno_cats = f[8].strip().split(';')
            our_phenos = []
            for pcat in pheno_cats:
                pcat = pcat.strip()
                if not pcat:
                    continue
                if pcat not in phenos:
                    pcat_records.append({
                        'id':       pcat_id,
                        'category': pcat,
                        'alias':    pheno_synonyms[pcat],
                    })
                    phenos[pcat] = pcat_id
                    pcat_id += 1
                our_phenos.append(phenos[pcat])

            # Get platform info
            our_platforms = []
            try:
                plat, snp_count, impt = [
                    i.strip() for i in plat_parser.findall(f[18].strip())[0]
                ]
                imputed = True if impt == '(imputed)' else False
                plats = _split_mesy_list(plat)
                for plat in plats:
                    plat = plat.strip()
                    if plat not in platforms:
                        plat_records.append({'id':       plat_id,
                                             'platform': plat})
                        platforms[plat] = plat_id
                        plat_id += 1
                    our_platforms.append(platforms[plat])
            except IndexError:
                plat, snp_count, impt = None, None, None
                imputed = None

            # Get population description
            try:
                pop = f[19].strip()
                try:
                    pop = pop_correction[pop]
                except KeyError:
                    pass
                if pop not in populations:
                    pop_records.append({'id':         pop_id,
                                        'population': pop})
                    populations[pop] = pop_id
                    pop_id += 1
                population = populations[pop]
            except IndexError:
                population = None

            # Set populaion flags
            pflag = _PopFlag
            disc_pop = pflag(0)
            rep_pop  = pflag(0)
            l = len(f)
            if l > 22 and f[22]:
                disc_pop |= pflag.eur
            if l > 23 and f[23]:
                disc_pop |= pflag.afr
            if l > 24 and f[24]:
                disc_pop |= pflag.east_asian
            if l > 25 and f[25]:
                disc_pop |= pflag.south_asian
            if l > 26 and f[26]:
                disc_pop |= pflag.his
            if l > 27 and f[27]:
                disc_pop |= pflag.native
            if l > 28 and f[28]:
                disc_pop |= pflag.micro
            if l > 29 and f[29]:
                disc_pop |= pflag.arab
            if l > 30 and f[30]:
                disc_pop |= pflag.mix
            if l > 31 and f[31]:
                disc_pop |= pflag.uns
            if l > 32 and f[32]:
                disc_pop |= pflag.filipino
            if l > 33 and f[33]:
                disc_pop |= pflag.indonesian
            if l > 35 and f[35]:
                rep_pop |= pflag.eur
            if l > 36 and f[36]:
                rep_pop |= pflag.afr
            if l > 37 and f[37]:
                rep_pop |= pflag.east_asian
            if l > 38 and f[38]:
                rep_pop |= pflag.south_asian
            if l > 39 and f[39]:
                rep_pop |= pflag.his
            if l > 40 and f[40]:
                rep_pop |= pflag.native
            if l > 41 and f[41]:
                rep_pop |= pflag.micro
            if l > 42 and f[42]:
                rep_pop |= pflag.arab
            if l > 43 and f[43]:
                rep_pop |= pflag.mix
            if l > 44 and f[44]:
                rep_pop |= pflag.uns
            if l > 45 and f[45]:
                rep_pop |= pflag.filipino
            if l > 46 and f[46]:
                rep_pop |= pflag.indonesian

            # Set the global population flag
            pop_flag = disc_pop | rep_pop

            # Create study
            study_records.append({
                'id':               int(f[0]),
                'author':           _cleanstr(f[1]),
                'pmid':             _cleanstr(f[2]),
                'grasp_ver':        1 if '1.0' in f[3] else 2,
                'noresults':        True if f[4] else False,
                'results':          int(f[5]),
                'qtl':              True if f[6] == '1' else False,
                'phenotype_id':     pphenos[ppheno],
                'phenotype_desc':   ppheno,
                'phenotype':        pphenos[ppheno],
                'phenotype_cats':   our_phenos,
                'datepub':          _get_date(f[9]),
                'in_nhgri':         _get_bool(f[10]),
                'journal':          _cleanstr(f[11]),
                'title':            _cleanstr(f[12]),
                'locations':        _cleanstr(f[13]),
                'mf':               _get_bool(f[14]),
                'mf_only':          _get_bool(f[15]),
                'sample_size':      _cleanstr(f[16]),
                'replication_size': _cleanstr(f[17]),
                'platforms':        platforms,
                'snp_count':        snp_count,
                'imputed':          imputed,
                'population_id':    population,
                'population':       population,
                'total':            int(f[20]),
                'total_disc':       int(f[21]),
                'pop_flag':         int(pop_flag),
                'disc_pop_flag':    int(disc_pop),
                'european':         int(f[22]) if l > 22 and f[22] else None,
                'african':          int(f[23]) if l > 23 and f[23] else None,
                'east_asian':       int(f[24]) if l > 24 and f[24] else None,
                'south_asian':      int(f[25]) if l > 25 and f[25] else None,
                'hispanic':         int(f[26]) if l > 26 and f[26] else None,
                'native':           int(f[27]) if l > 27 and f[27] else None,
                'micronesian':      int(f[28]) if l > 28 and f[28] else None,
                'arab':             int(f[29]) if l > 29 and f[29] else None,
                'mixed':            int(f[30]) if l > 30 and f[30] else None,
                'unspecified':      int(f[31]) if l > 31 and f[31] else None,
                'filipino':         int(f[32]) if l > 32 and f[32] else None,
                'indonesian':       int(f[33]) if l > 33 and f[33] else None,
                'total_rep':        int(f[34]) if l > 34 and f[34] else None,
                'rep_pop_flag':     int(rep_pop),
                'rep_european':     int(f[35]) if l > 35 and f[35] else None,
                'rep_african':      int(f[36]) if l > 36 and f[36] else None,
                'rep_east_asian':   int(f[37]) if l > 37 and f[37] else None,
                'rep_south_asian':  int(f[38]) if l > 38 and f[38] else None,
                'rep_hispanic':     int(f[39]) if l > 39 and f[39] else None,
                'rep_native':       int(f[40]) if l > 40 and f[40] else None,
                'rep_micronesian':  int(f[41]) if l > 41 and f[41] else None,
                'rep_arab':         int(f[42]) if l > 42 and f[42] else None,
                'rep_mixed':        int(f[43]) if l > 43 and f[43] else None,
                'rep_unspecified':  int(f[44]) if l > 44 and f[44] else None,
                'rep_filipino':     int(f[45]) if l > 45 and f[45] else None,
                'rep_indonesian':   int(f[46]) if l > 46 and f[46] else None,
            })

            # Create association records
            for i in our_phenos:
                phstudy_records.append({'study_id':    int(f[0]),
                                        'pheno_id':    i})
            for i in our_platforms:
                plstudy_records.append({'study_id':    int(f[0]),
                                        'platform_id': i})

            pbar.update()

    pbar.close()
    print('Writing study information...')
    conn.execute(pheno_ins, pheno_records)
    conn.execute(pcat_ins, pcat_records)
    conn.execute(plat_ins, plat_records)
    conn.execute(pop_ins, pop_records)
    conn.execute(study_ins, study_records)
    conn.execute(phstudy_ins, phstudy_records)
    conn.execute(plstudy_ins, plstudy_records)
    print('Done')

    # Reinitialize lists for main GRASP parser
    pheno_records   = []
    pcat_records    = []
    plat_records    = []
    pop_records     = []

    # Get full study info from database for use in SNPs
    sinfo = conn.execute(_select([study_table.c.id, study_table.c.pmid])).fetchall()
    studies = {}
    for i, p in sinfo:
        studies[p] = i
    no_pmid = {
        'Dissertation (https://openaccess.leidenuniv.nl/handle/1887/17746)':                                                                          1,
        'KARE Genomewide Association Study of Blood Pressure Using Imputed SNPs':                                                                     2,
        'Genome-wide Association Study Identification of a New Genetic Locus with Susceptibility to Osteoporotic Fracture in the Korean Population.': 3,
        'Genome-wide Association Study Identified TIMP2 Genetic Variant with Susceptibility to Osteoarthritis':                                       4,
        'Application of Structural Equation Models to Genome-wide Association Analysis ':                                                             5,
        'Comparison of Erythrocyte Traits Among European, Japanese and Korean':                                                                       6,
        'Genomewide Association Study Identification of a New Genetic Locus with Susceptibility to Osteoporotic Fracture in the Korean Population':   7,
        'Joint identification of multiple genetic variants of obesity in A Korean Genome-wide association study':                                     8,
        'Genome-Wide Association Analyses on Blood Pressure Using Three Different Phenotype Definitions':                                             9,
        'Association of intronic sequence variant in the gene encoding spleen tyrosine kinase with susceptibility to vascular dementia':              10,
    }

    print('Parsing SNP information...')
    with _open_zipped(grasp_file, encoding='latin1') as fin:
        # Drop header
        fin.readline()

        if progress:
            pbar = _tqdm(total=8864717, unit='snps')

        for line in fin:
            f = line.rstrip().split('\t')

            # Get primary phenotype
            ppheno = _cleanstr(f[11])
            # These are poorly curated, so there is no need to use a
            # separate table for them.
            #  if ppheno not in pphenos:
                #  conn.execute(pheno_ins.values(
                    #  phenotype=ppheno
                #  ))
                #  pphenos[ppheno] = conn.execute(
                    #  select([pheno_table.c.id]).where(
                        #  pheno_table.c.phenotype == ppheno
                    #  )
                #  ).first()[0]

            # Get phenotype categories
            pheno_cats = f[13].strip().split(';')
            our_phenos = []
            for pcat in pheno_cats:
                pcat = pcat.strip()
                if not pcat:
                    continue
                if pcat not in phenos:
                    pcat_records.append({
                        'id':       pcat_id,
                        'category': pcat,
                        'alias':    pheno_synonyms[pcat],
                    })
                    phenos[pcat] = pcat_id
                    pcat_id += 1
                our_phenos.append(phenos[pcat])

            # Get population description
            try:
                pop = f[23].strip()
                try:
                    pop = pop_correction[pop]
                except KeyError:
                    pass
                if pop not in populations:
                    pop_records.append({'id':         pop_id,
                                        'population': pop})
                    populations[pop] = pop_id
                    pop_id += 1
                population = populations[pop]
            except IndexError:
                population = None

            # Create record for SNP
            try:
                sid       = int(f[0])
            except ValueError:
                sid       = spare_id
                spare_id += 1
            l = len(f)
            try:
                study = studies[f[7].strip()]
            except KeyError:
                study = no_pmid[f[17].strip()]
            record = {
                'id':               sid,
                'NHLBIkey':         f[0],
                'HUPfield':         f[1],
                'LastCurationDate': _get_date(f[2]),
                'CreationDate':     _get_date(f[3]),
                'snpid':            f[4],
                'chrom':            f[5],
                'pos':              int(f[6]),
                'population_id':    population,
                'population':       population,
                'study_id':         study,
                'study':            study,
                'study_snpid':      f[8],
                'paper_loc':        f[9],
                'pval':             float(f[10]) if f[10] else None,
                'phenotype_desc':   ppheno,
                'phenotype_cats':   our_phenos,
            }
            record['InGene']             = f[51] if l > 52 else None
            record['NearestGene']        = f[52] if l > 53 else None
            record['InLincRNA']          = f[53] if l > 54 else None
            record['InMiRNA']            = f[54] if l > 55 else None
            record['InMiRNABS']          = f[55] if l > 56 else None
            record['dbSNPfxn']           = f[56] if l > 57 else None
            record['dbSNPMAF']           = f[57] if l > 58 else None
            record['dbSNPinfo']          = f[58] if l > 59 else None
            record['dbSNPvalidation']    = f[59] if l > 60 else None
            record['dbSNPClinStatus']    = f[60] if l > 61 else None
            record['ORegAnno']           = f[61] if l > 62 else None
            record['ConservPredTFBS']    = f[62] if l > 63 else None
            record['HumanEnhancer']      = f[63] if l > 64 else None
            record['RNAedit']            = f[64] if l > 65 else None
            record['PolyPhen2']          = f[65] if l > 66 else None
            record['SIFT']               = f[66] if l > 67 else None
            record['LSSNP']              = f[67] if l > 68 else None
            record['UniProt']            = f[68] if l > 69 else None
            record['EqtlMethMetabStudy'] = f[69] if l > 70 else None
            snp_records.append(record)

            # Create association records
            for i in our_phenos:
                phsnp_records.append({'snp_id' : sid, 'pheno_id' : i})

            # Decide when to execute
            if count:
                count -= 1
            else:
                if progress:
                    pbar.write('Writing rows...')
                else:
                    print('Writing rows...')
                if pcat_records:
                    conn.execute(pcat_ins, pcat_records)
                if plat_records:
                    conn.execute(plat_ins, plat_records)
                if pop_records:
                    conn.execute(pop_ins, pop_records)
                conn.execute(snp_ins, snp_records)
                conn.execute(phsnp_ins, phsnp_records)
                if progress:
                    pbar.write('{} rows written'.format(rows))
                else:
                    print('{} rows written'.format(rows))
                count         = commit_every-1
                pcat_records  = []
                plat_records  = []
                pop_records   = []
                snp_records   = []
                phsnp_records = []
            rows += 1
            if progress:
                pbar.update()

        # Final insert
        pbar.close()
        print('Writing final rows...')
        conn.execute(snp_ins, snp_records)
        conn.execute(phsnp_ins, phsnp_records)
        print('{} rows written'.format(rows))
        print('Done!')
Example #7
0
def fractionalCover(
    img,
    endmembers,
    endmember_names,
    shade_normalize=False,
):
    """Computes the percent cover of each endmember spectra.

    Args:
        img: the ee.Image to unmix.
        endmembers: lists of ee.List objects, each element corresponding to a sub.
        endmember_names: list of names for each endmember. must match the number of lists passed.
        shade_normalize: flag to apply shade normalization during unmixing.

    Returns:
        unmixed: a 3-band image file in order of (soil-veg-impervious).
    """

    n_bands = len(list(img.bandNames().getInfo()))
    n_classes = len(endmembers)
    n_endmembers = len(endmembers[0])
    band_numbers = list(range(n_classes))
    shade = _ee.List([0] * n_bands)

    # create a list of images to append and later convert to an image collection
    unmixed = list()

    # loop through each iteration and unmix each
    for spectra in _tqdm(list(zip(*endmembers)), total=n_endmembers, desc="Unmixing"):

        if shade_normalize:
            spectra += (shade,)

        unmixed_iter = img.unmix(spectra, True, True).toFloat()

        # run the forward model to evaluate the fractional cover fit
        modeled_reflectance = computeModeledSpectra(spectra, unmixed_iter)
        rmse = computeSpectralRMSE(img, modeled_reflectance)

        # normalize by the observed shade fraction
        if shade_normalize:
            shade_fraction = unmixed_iter.select([n_classes]).subtract(1).abs()
            unmixed_iter = unmixed_iter.divide(shade_fraction)

        # rename the bands and append an rmse band
        unmixed.append(
            unmixed_iter.select(band_numbers, endmember_names).addBands(rmse)
        )

    # use the sum of rmse to weight each estimate
    rmse_sum = _ee.Image(
        _ee.ImageCollection.fromImages(unmixed)
        .select(["RMSE"])
        .sum()
        .select([0], ["SUM"])
        .toFloat()
    )
    unscaled = [computeWeight(fractions, rmse_sum) for fractions in unmixed]

    # use these weights to scale each unmixing estimate
    weight_sum = _ee.Image(
        _ee.ImageCollection.fromImages(unscaled).select(["weight"]).sum().toFloat()
    )
    scaled = [weightedAverage(fractions, weight_sum) for fractions in unscaled]

    # reduce it to a single image and return
    unmixed = _ee.ImageCollection.fromImages(scaled).sum().toFloat()

    return unmixed
Example #8
0
def tqdm(*args, **kwargs):
    kwargs['ascii'] = True
    return _tqdm(*args, **kwargs)
Example #9
0
def main():
    import argparse as _argparse
    import json as _json
    import os as _os
    import re as _re
    import shutil as _shutil
    import hashlib as _hashlib
    import functools as _functools
    from collections import defaultdict as _defaultdict
    from datetime import datetime as _datetime
    from pathlib import Path as Path
    from tqdm import tqdm as _tqdm
    try:
        from google_photos_takeout_helper.__version__ import __version__
    except ModuleNotFoundError:
        from __version__ import __version__

    import piexif as _piexif
    from fractions import Fraction  # piexif requires some values to be stored as rationals
    import math
    if _os.name == 'nt':
        import win32_setctime as _windoza_setctime

    parser = _argparse.ArgumentParser(
        prog='Google Photos Takeout Helper',
        usage=
        'google-photos-takeout-helper -i [INPUT TAKEOUT FOLDER] -o [OUTPUT FOLDER]',
        description=
        """This script takes all of your photos from Google Photos takeout, 
        fixes their exif DateTime data (when they were taken) and file creation date,
        and then copies it all to one folder.
        """,
    )
    parser.add_argument('--version',
                        action='version',
                        version=f"%(prog)s {__version__}")
    parser.add_argument(
        '-i',
        '--input-folder',
        type=str,
        required=True,
        help='Input folder with all stuff form Google Photos takeout zip(s)')
    parser.add_argument(
        '-o',
        '--output-folder',
        type=str,
        required=False,
        default='ALL_PHOTOS',
        help='Output folders which in all photos will be placed in')
    parser.add_argument(
        '--skip-extras',
        action='store_true',
        help=
        'EXPERIMENTAL: Skips the extra photos like photos that end in "edited" or "EFFECTS".'
    )
    parser.add_argument(
        '--skip-extras-harder',  # Oh yeah, skip my extras harder daddy
        action='store_true',
        help=
        'EXPERIMENTAL: Skips the extra photos like photos like pic(1). Also includes --skip-extras.'
    )
    parser.add_argument(
        "--divide-to-dates",
        action='store_true',
        help=
        "Create folders and subfolders based on the date the photos were taken"
    )
    parser.add_argument(
        '--albums',
        type=str,
        help=
        "EXPERIMENTAL, MAY NOT WORK FOR EVERYONE: What kind of 'albums solution' you would like:\n"
        "'json' - written in a json file\n")
    args = parser.parse_args()

    logger.info('Heeeere we go!')

    PHOTOS_DIR = Path(args.input_folder)
    FIXED_DIR = Path(args.output_folder)

    TAG_DATE_TIME_ORIGINAL = _piexif.ExifIFD.DateTimeOriginal
    TAG_DATE_TIME_DIGITIZED = _piexif.ExifIFD.DateTimeDigitized
    TAG_DATE_TIME = 306
    TAG_PREVIEW_DATE_TIME = 50971

    photo_formats = [
        '.jpg', '.jpeg', '.png', '.webp', '.bmp', '.tif', '.tiff', '.svg',
        '.heic'
    ]
    video_formats = [
        '.mp4', '.gif', '.mov', '.webm', '.avi', '.wmv', '.rm', '.mpg', '.mpe',
        '.mpeg', '.mkv', '.m4v', '.mts', '.m2ts'
    ]
    extra_formats = [
        '-edited',
        '-effects',
        '-smile',
        '-mix',  # EN/US
        '-edytowane',  # PL
        # Add more "edited" flags in more languages if you want. They need to be lowercase.
    ]

    # Album Multimap
    album_mmap = _defaultdict(list)

    # Duplicate by full hash multimap
    files_by_full_hash = _defaultdict(list)

    # holds all the renamed files that clashed from their
    rename_map = dict()

    _all_jsons_dict = _defaultdict(dict)

    # Statistics:
    s_removed_duplicates_count = 0
    s_copied_files = 0
    s_cant_insert_exif_files = []  # List of files where inserting exif failed
    s_date_from_folder_files = [
    ]  # List of files where date was set from folder name
    s_skipped_extra_files = [
    ]  # List of extra files ("-edited" etc) which were skipped
    s_no_json_found = []  # List of files where we couldn't find json
    s_no_date_at_all = [
    ]  # List of files where there was absolutely no option to set correct date

    FIXED_DIR.mkdir(parents=True, exist_ok=True)

    def for_all_files_recursive(dir: Path,
                                file_function=lambda fi: True,
                                folder_function=lambda fo: True,
                                filter_fun=lambda file: True):
        for file in dir.rglob("*"):
            if file.is_dir():
                folder_function(file)
                continue
            elif file.is_file():
                if filter_fun(file):
                    file_function(file)
            else:
                logger.debug(f'Found something weird... {file}')

    def is_photo(file: Path):
        if file.suffix.lower() not in photo_formats:
            return False
        # skips the extra photo file, like edited or effects. They're kinda useless.
        nonlocal s_skipped_extra_files
        if args.skip_extras or args.skip_extras_harder:  # if the file name includes something under the extra_formats, it skips it.
            for extra in extra_formats:
                if extra in file.name.lower():
                    s_skipped_extra_files.append(str(file.resolve()))
                    return False
        if args.skip_extras_harder:
            search = r"\(\d+\)\."  # we leave the period in so it doesn't catch folders.
            if bool(_re.search(search, file.name)):
                # PICT0003(5).jpg -> PICT0003.jpg      The regex would match "(5).", and replace it with a "."
                plain_file = file.with_name(_re.sub(search, '.', str(file)))
                # if the original exists, it will ignore the (1) file, ensuring there is only one copy of each file.
                if plain_file.is_file():
                    s_skipped_extra_files.append(str(file.resolve()))
                    return False
        return True

    def is_video(file: Path):
        if file.suffix.lower() not in video_formats:
            return False
        return True

    def chunk_reader(fobj, chunk_size=1024):
        """ Generator that reads a file in chunks of bytes """
        while True:
            chunk = fobj.read(chunk_size)
            if not chunk:
                return
            yield chunk

    def get_hash(file: Path, first_chunk_only=False, hash_algo=_hashlib.sha1):
        hashobj = hash_algo()
        with open(file, "rb") as f:
            if first_chunk_only:
                hashobj.update(f.read(1024))
            else:
                for chunk in chunk_reader(f):
                    hashobj.update(chunk)
        return hashobj.digest()

    def populate_album_map(path: Path,
                           filter_fun=lambda f: (is_photo(f) or is_video(f))):
        if not path.is_dir():
            raise NotADirectoryError(
                'populate_album_map only handles directories not files')

        meta_file_exists = find_album_meta_json_file(path)
        if meta_file_exists is None or not meta_file_exists.exists():
            return False

        # means that we are processing an album so process
        for file in path.rglob("*"):
            if not (file.is_file() and filter_fun(file)):
                continue
            file_name = file.name
            # If it's not in the output folder
            if not (FIXED_DIR / file.name).is_file():
                full_hash = None
                try:
                    full_hash = get_hash(file, first_chunk_only=False)
                except Exception as e:
                    logger.debug(e)
                    logger.debug(
                        f"populate_album_map - couldn't get hash of {file}")
                if full_hash is not None and full_hash in files_by_full_hash:
                    full_hash_files = files_by_full_hash[full_hash]
                    if len(full_hash_files) != 1:
                        logger.error(
                            "full_hash_files list should only be one after duplication removal, bad state"
                        )
                        exit(-5)
                        return False
                    file_name = full_hash_files[0].name

            # check rename map in case there was an overlap namechange
            if str(file) in rename_map:
                file_name = rename_map[str(file)].name

            album_mmap[file.parent.name].append(file_name)

    # PART 3: removing duplicates

    # THIS IS PARTLY COPIED FROM STACKOVERFLOW
    # https://stackoverflow.com/questions/748675/finding-duplicate-files-and-removing-them
    #
    # We now use an optimized version linked from tfeldmann
    # https://gist.github.com/tfeldmann/fc875e6630d11f2256e746f67a09c1ae
    #
    # THANK YOU Todor Minakov (https://github.com/tminakov) and Thomas Feldmann (https://github.com/tfeldmann)
    #
    # NOTE: defaultdict(list) is a multimap, all init array handling is done internally
    # See: https://en.wikipedia.org/wiki/Multimap#Python
    #
    def find_duplicates(path: Path, filter_fun=lambda file: True):
        files_by_size = _defaultdict(list)
        files_by_small_hash = _defaultdict(list)

        for file in path.rglob("*"):
            if file.is_file() and filter_fun(file):
                try:
                    file_size = file.stat().st_size
                except (OSError, FileNotFoundError):
                    # not accessible (permissions, etc) - pass on
                    continue
                files_by_size[file_size].append(file)

        # For all files with the same file size, get their hash on the first 1024 bytes
        for file_size, files in files_by_size.items():
            if len(files) < 2:
                continue  # this file size is unique, no need to spend cpu cycles on it

            for file in files:
                try:
                    small_hash = get_hash(file, first_chunk_only=True)
                except OSError:
                    # the file access might've changed till the exec point got here
                    continue
                files_by_small_hash[(file_size, small_hash)].append(file)

        # For all files with the hash on the first 1024 bytes, get their hash on the full
        # file - if more than one file is inserted on a hash here they are certinly duplicates
        for files in _tqdm(files_by_small_hash.values()):
            if len(files) < 2:
                # the hash of the first 1k bytes is unique -> skip this file
                continue

            for file in files:
                try:
                    full_hash = get_hash(file, first_chunk_only=False)
                except OSError:
                    # the file access might've changed till the exec point got here
                    continue

                files_by_full_hash[full_hash].append(file)

    # Removes all duplicates in folder
    # ONLY RUN AFTER RUNNING find_duplicates()
    def remove_duplicates():
        nonlocal s_removed_duplicates_count
        # Now we have populated the final multimap of absolute dups, We now can attempt to find the original file
        # and remove all the other duplicates
        for files in _tqdm(files_by_full_hash.values()):
            if len(files) < 2:
                continue  # this file size is unique, no need to spend cpu cycles on it

            s_removed_duplicates_count += len(files) - 1
            for file in files:
                # TODO reconsider which dup we delete these now that we're searching globally?
                if len(files) > 1:
                    file.unlink()
                    files.remove(file)
        return True

    # PART 1: Fixing metadata and date-related stuff

    # Returns json dict
    def find_json_for_file(file: Path):
        parenthesis_regexp = r'\([0-9]+\)'
        parenthesis = _re.findall(parenthesis_regexp, file.name)
        if len(parenthesis) == 1:
            # Fix for files that have as image/video IMG_1234(1).JPG with a json IMG_1234.JPG(1).json
            stripped_filename = _re.sub(parenthesis_regexp, '', file.name)
            potential_json = file.with_name(stripped_filename +
                                            parenthesis[0] + '.json')
        else:
            potential_json = file.with_name(file.name + '.json')

        if potential_json.is_file():
            try:
                with open(potential_json, 'r') as f:
                    json_dict = _json.load(f)
                return json_dict
            except:
                raise FileNotFoundError(f"Couldn't find json for file: {file}")

        nonlocal _all_jsons_dict
        # Check if we need to load this folder
        if file.parent not in _all_jsons_dict:
            for json_file in file.parent.rglob("*.json"):
                try:
                    with json_file.open('r') as f:
                        json_dict = _json.load(f)
                        if "title" in json_dict:
                            # We found a JSON file with a proper title, store the file name
                            _all_jsons_dict[file.parent][
                                json_dict["title"]] = json_dict
                except:
                    logger.debug(f"Couldn't open json file {json_file}")

        # Check if we have found the JSON file among all the loaded ones in the folder
        if file.parent in _all_jsons_dict and file.name in _all_jsons_dict[
                file.parent]:
            # Great we found a valid JSON file in this folder corresponding to this file
            return _all_jsons_dict[file.parent][file.name]
        else:
            nonlocal s_no_json_found
            s_no_json_found.append(str(file.resolve()))
            raise FileNotFoundError(f"Couldn't find json for file: {file}")

    # Returns date in 2019:01:01 23:59:59 format
    def get_date_from_folder_meta(dir: Path):
        file = find_album_meta_json_file(dir)
        if not file:
            logger.debug("Couldn't pull datetime from album meta")
            return None
        try:
            with open(str(file), 'r') as fi:
                album_dict = _json.load(fi)
                # find_album_meta_json_file *should* give us "safe" file
                time = int(album_dict["albumData"]["date"]["timestamp"])
                return _datetime.fromtimestamp(time).strftime(
                    '%Y:%m:%d %H:%M:%S')
        except KeyError:
            logger.error(
                "get_date_from_folder_meta - json doesn't have required stuff "
                "- that probably means that either google f****d us again, or find_album_meta_json_file"
                "is seriously broken")

        return None

    @_functools.lru_cache(maxsize=None)
    def find_album_meta_json_file(dir: Path):
        for file in dir.rglob("*.json"):
            try:
                with open(str(file), 'r') as f:
                    dict = _json.load(f)
                    if "albumData" in dict:
                        return file
            except Exception as e:
                logger.debug(e)
                logger.debug(
                    f"find_album_meta_json_file - Error opening file: {file}")

        return None

    def set_creation_date_from_str(file: Path, str_datetime):
        try:
            # Turns out exif can have different formats - YYYY:MM:DD, YYYY/..., YYYY-... etc
            # God wish that americans won't have something like MM-DD-YYYY
            # The replace ': ' to ':0' fixes issues when it reads the string as 2006:11:09 10:54: 1.
            # It replaces the extra whitespace with a 0 for proper parsing
            str_datetime = str_datetime.replace('-', ':').replace(
                '/', ':').replace('.', ':').replace('\\',
                                                    ':').replace(': ',
                                                                 ':0')[:19]
            timestamp = _datetime.strptime(str_datetime,
                                           '%Y:%m:%d %H:%M:%S').timestamp()
            _os.utime(file, (timestamp, timestamp))
            if _os.name == 'nt':
                _windoza_setctime.setctime(str(file), timestamp)
        except Exception as e:
            logger.debug('Error setting creation date from string:')
            logger.debug(e)
            raise ValueError(
                f"Error setting creation date from string: {str_datetime}")

    def set_creation_date_from_exif(file: Path):
        try:
            # Why do you need to be like that, Piexif...
            exif_dict = _piexif.load(str(file))
        except Exception as e:
            raise IOError("Can't read file's exif!")
        tags = [['0th', TAG_DATE_TIME], ['Exif', TAG_DATE_TIME_ORIGINAL],
                ['Exif', TAG_DATE_TIME_DIGITIZED]]
        datetime_str = ''
        date_set_success = False
        for tag in tags:
            try:
                datetime_str = exif_dict[tag[0]][tag[1]].decode('UTF-8')
                set_creation_date_from_str(file, datetime_str)
                date_set_success = True
                break
            except KeyError:
                pass  # No such tag - continue searching :/
            except ValueError:
                logger.debug("Wrong date format in exif!")
                logger.debug(datetime_str)
                logger.debug("does not match '%Y:%m:%d %H:%M:%S'")
        if not date_set_success:
            raise IOError('No correct DateTime in given exif')

    def set_file_exif_date(file: Path, creation_date):
        try:
            exif_dict = _piexif.load(str(file))
        except:  # Sorry but Piexif is too unpredictable
            exif_dict = {'0th': {}, 'Exif': {}}

        creation_date = creation_date.encode('UTF-8')
        exif_dict['0th'][TAG_DATE_TIME] = creation_date
        exif_dict['Exif'][TAG_DATE_TIME_ORIGINAL] = creation_date
        exif_dict['Exif'][TAG_DATE_TIME_DIGITIZED] = creation_date

        try:
            _piexif.insert(_piexif.dump(exif_dict), str(file))
        except Exception as e:
            logger.debug("Couldn't insert exif!")
            logger.debug(e)
            nonlocal s_cant_insert_exif_files
            s_cant_insert_exif_files.append(str(file.resolve()))

    def get_date_str_from_json(json):
        return _datetime.fromtimestamp(int(
            json['photoTakenTime']['timestamp'])).strftime('%Y:%m:%d %H:%M:%S')

    # ========= THIS IS ALL GPS STUFF =========

    def change_to_rational(number):
        """convert a number to rantional
        Keyword arguments: number
        return: tuple like (1, 2), (numerator, denominator)
        """
        f = Fraction(str(number))
        return f.numerator, f.denominator

    # got this here https://github.com/hMatoba/piexifjs/issues/1#issuecomment-260176317
    def degToDmsRational(degFloat):
        min_float = degFloat % 1 * 60
        sec_float = min_float % 1 * 60
        deg = math.floor(degFloat)
        deg_min = math.floor(min_float)
        sec = round(sec_float * 100)

        return [(deg, 1), (deg_min, 1), (sec, 100)]

    def set_file_geo_data(file: Path, json):
        """
        Reads the geoData from google and saves it to the EXIF. This works assuming that the geodata looks like -100.12093, 50.213143. Something like that.

        Written by DalenW.
        :param file:
        :param json:
        :return:
        """

        # prevents crashes
        try:
            exif_dict = _piexif.load(str(file))
        except:
            exif_dict = {'0th': {}, 'Exif': {}}

        # converts a string input into a float. If it fails, it returns 0.0
        def _str_to_float(num):
            if type(num) == str:
                return 0.0
            else:
                return float(num)

        # fallbacks to GeoData Exif if it wasn't set in the photos editor.
        # https://github.com/TheLastGimbus/GooglePhotosTakeoutHelper/pull/5#discussion_r531792314
        longitude = _str_to_float(json['geoData']['longitude'])
        latitude = _str_to_float(json['geoData']['latitude'])
        altitude = _str_to_float(json['geoData']['altitude'])

        # Prioritise geoData set from GPhotos editor. If it's blank, fall back to geoDataExif
        if longitude == 0 and latitude == 0:
            longitude = _str_to_float(json['geoDataExif']['longitude'])
            latitude = _str_to_float(json['geoDataExif']['latitude'])
            altitude = _str_to_float(json['geoDataExif']['altitude'])

        # latitude >= 0: North latitude -> "N"
        # latitude < 0: South latitude -> "S"
        # longitude >= 0: East longitude -> "E"
        # longitude < 0: West longitude -> "W"

        if longitude >= 0:
            longitude_ref = 'E'
        else:
            longitude_ref = 'W'
            longitude = longitude * -1

        if latitude >= 0:
            latitude_ref = 'N'
        else:
            latitude_ref = 'S'
            latitude = latitude * -1

        # referenced from https://gist.github.com/c060604/8a51f8999be12fc2be498e9ca56adc72
        gps_ifd = {_piexif.GPSIFD.GPSVersionID: (2, 0, 0, 0)}

        # skips it if it's empty
        if latitude != 0 or longitude != 0:
            gps_ifd.update({
                _piexif.GPSIFD.GPSLatitudeRef:
                latitude_ref,
                _piexif.GPSIFD.GPSLatitude:
                degToDmsRational(latitude),
                _piexif.GPSIFD.GPSLongitudeRef:
                longitude_ref,
                _piexif.GPSIFD.GPSLongitude:
                degToDmsRational(longitude)
            })

        if altitude != 0:
            gps_ifd.update({
                _piexif.GPSIFD.GPSAltitudeRef:
                1,
                _piexif.GPSIFD.GPSAltitude:
                change_to_rational(round(altitude))
            })

        gps_exif = {"GPS": gps_ifd}
        exif_dict.update(gps_exif)

        try:
            _piexif.insert(_piexif.dump(exif_dict), str(file))
        except Exception as e:
            logger.debug("Couldn't insert geo exif!")
            # local variable 'new_value' referenced before assignment means that one of the GPS values is incorrect
            logger.debug(e)

    # ============ END OF GPS STUFF ============

    # Fixes ALL metadata, takes just file and dir and figures it out
    def fix_metadata(file: Path):
        # logger.info(file)

        has_nice_date = False
        try:
            set_creation_date_from_exif(file)
            has_nice_date = True
        except (_piexif.InvalidImageDataError, ValueError, IOError) as e:
            logger.debug(e)
            logger.debug(f'No exif for {file}')
        except IOError:
            logger.debug('No creation date found in exif!')

        try:
            google_json = find_json_for_file(file)
            date = get_date_str_from_json(google_json)
            set_file_geo_data(file, google_json)
            set_file_exif_date(file, date)
            set_creation_date_from_str(file, date)
            has_nice_date = True
            return
        except FileNotFoundError as e:
            logger.debug(e)

        if has_nice_date:
            return True

        logger.debug(f'Last option, copying folder meta as date for {file}')
        date = get_date_from_folder_meta(file.parent)
        if date is not None:
            set_file_exif_date(file, date)
            set_creation_date_from_str(file, date)
            nonlocal s_date_from_folder_files
            s_date_from_folder_files.append(str(file.resolve()))
            return True
        else:
            logger.warning(
                f'There was literally no option to set date on {file}')
            nonlocal s_no_date_at_all
            s_no_date_at_all.append(str(file.resolve()))

        return False

    # PART 2: Copy all photos and videos to target folder

    # Makes a new name like 'photo(1).jpg'
    def new_name_if_exists(file: Path):
        new_name = file
        i = 1
        while True:
            if not new_name.is_file():
                return new_name
            else:
                new_name = file.with_name(f"{file.stem}({i}){file.suffix}")
                rename_map[str(file)] = new_name
                i += 1

    def copy_to_target(file: Path):
        if is_photo(file) or is_video(file):
            new_file = new_name_if_exists(FIXED_DIR / file.name)
            _shutil.copy2(file, new_file)
            nonlocal s_copied_files
            s_copied_files += 1
        return True

    def copy_to_target_and_divide(file: Path):
        creation_date = file.stat().st_mtime
        date = _datetime.fromtimestamp(creation_date)

        new_path = FIXED_DIR / f"{date.year}/{date.month:02}/"
        new_path.mkdir(parents=True, exist_ok=True)

        new_file = new_name_if_exists(new_path / file.name)
        _shutil.copy2(file, new_file)
        nonlocal s_copied_files
        s_copied_files += 1
        return True

    # xD python lambdas are shit - this is only because we can't do 2 commands, so we do them in arguments
    def _walk_with_tqdm(res, bar: _tqdm):
        bar.update()
        return res

    # Count *all* photo and video files - this is hacky, and we should use .rglob altogether instead of is_photo
    _input_files_count = 0
    for ext in photo_formats + video_formats:
        _input_files_count += len(list(PHOTOS_DIR.rglob(f'**/*{ext}')))
    logger.info(f'Input files: {_input_files_count}')

    logger.info('=====================')
    logger.info('Fixing files metadata and creation dates...')
    # tqdm progress bar stuff
    _metadata_bar = _tqdm(total=_input_files_count)

    for_all_files_recursive(
        dir=PHOTOS_DIR,
        file_function=lambda f: _walk_with_tqdm(fix_metadata(f), _metadata_bar
                                                ),
        # TODO (probably never, but should): Change this maybe to path.rglob
        filter_fun=lambda f: (is_photo(f) or is_video(f)))
    _metadata_bar.close()
    logger.info('=====================')

    logger.info('=====================')
    _copy_bar = _tqdm(total=_input_files_count)
    if args.divide_to_dates:
        logger.info('Creating subfolders and dividing files based on date...')
        for_all_files_recursive(dir=PHOTOS_DIR,
                                file_function=lambda f: _walk_with_tqdm(
                                    copy_to_target_and_divide(f), _copy_bar),
                                filter_fun=lambda f:
                                (is_photo(f) or is_video(f)))
    else:
        logger.info('Coping all files to one folder...')
        logger.info(
            '(If you want, you can get them organized in folders based on year and month.'
            ' Run with --divide-to-dates to do this)')
        for_all_files_recursive(dir=PHOTOS_DIR,
                                file_function=lambda f: _walk_with_tqdm(
                                    copy_to_target(f), _copy_bar),
                                filter_fun=lambda f:
                                (is_photo(f) or is_video(f)))
    _copy_bar.close()
    logger.info('=====================')
    logger.info('=====================')
    logger.info('Finding duplicates...')
    find_duplicates(FIXED_DIR, lambda f: (is_photo(f) or is_video(f)))
    logger.info('Removing duplicates...')
    remove_duplicates()
    logger.info('=====================')
    if args.albums is not None:
        if args.albums.lower() == 'json':
            logger.info('=====================')
            logger.info('Populate json file with albums...')
            logger.info('=====================')
            for_all_files_recursive(dir=PHOTOS_DIR,
                                    folder_function=populate_album_map)
            file = PHOTOS_DIR / 'albums.json'
            with open(file, 'w', encoding="utf-8") as outfile:
                _json.dump(album_mmap, outfile)
            logger.info(str(file))

    logger.info('')
    logger.info('DONE! FREEEEEDOOOOM!!!')
    logger.info('')
    logger.info("Final statistics:")
    logger.info(f"Files copied to target folder: {s_copied_files}")
    logger.info(f"Removed duplicates: {s_removed_duplicates_count}")
    logger.info(
        f"Files for which we couldn't find json: {len(s_no_json_found)}")
    if len(s_no_json_found) > 0:
        with open(PHOTOS_DIR / 'no_json_found.txt', 'w',
                  encoding="utf-8") as f:
            f.write(
                "# This file contains list of files for which there was no corresponding .json file found\n"
            )
            f.write(
                "# You might find it useful, but you can safely delete this :)\n"
            )
            f.write("\n".join(s_no_json_found))
            logger.info(f" - you have full list in {f.name}")
    logger.info(
        f"Files where inserting new exif failed: {len(s_cant_insert_exif_files)}"
    )
    if len(s_cant_insert_exif_files) > 0:
        logger.info(
            "(This is not necessary bad thing - pretty much all videos fail, "
            "and your photos probably have their original exif already")
        with open(PHOTOS_DIR / 'failed_inserting_exif.txt',
                  'w',
                  encoding="utf-8") as f:
            f.write(
                "# This file contains list of files where setting right exif date failed\n"
            )
            f.write(
                "# You might find it useful, but you can safely delete this :)\n"
            )
            f.write("\n".join(s_cant_insert_exif_files))
            logger.info(f" - you have full list in {f.name}")
    logger.info(
        f"Files where date was set from name of the folder: {len(s_date_from_folder_files)}"
    )
    if len(s_date_from_folder_files) > 0:
        with open(PHOTOS_DIR / 'date_from_folder_name.txt', 'w') as f:
            f.write(
                "# This file contains list of files where date was set from name of the folder\n"
            )
            f.write(
                "# You might find it useful, but you can safely delete this :)\n"
            )
            f.write("\n".join(s_date_from_folder_files))
            logger.info(f" - you have full list in {f.name}")
    if args.skip_extras or args.skip_extras_harder:
        # Remove duplicates: https://www.w3schools.com/python/python_howto_remove_duplicates.asp
        s_skipped_extra_files = list(dict.fromkeys(s_skipped_extra_files))
        logger.info(
            f"Extra files that were skipped: {len(s_skipped_extra_files)}")
        with open(PHOTOS_DIR / 'skipped_extra_files.txt',
                  'w',
                  encoding="utf-8") as f:
            f.write(
                "# This file contains list of extra files (ending with '-edited' etc) which were skipped because "
                "you've used either --skip-extras or --skip-extras-harder\n")
            f.write(
                "# You might find it useful, but you can safely delete this :)\n"
            )
            f.write("\n".join(s_skipped_extra_files))
            logger.info(f" - you have full list in {f.name}")
    if len(s_no_date_at_all) > 0:
        logger.info('')
        logger.info(
            f"!!! There were {len(s_no_date_at_all)} files where there was absolutely no way to set "
            f"a correct date! They will probably appear at the top of the others, as their 'last modified' "
            f"value is set to moment of downloading your takeout :/")
        with open(PHOTOS_DIR / 'unsorted.txt', 'w', encoding="utf-8") as f:
            f.write(
                "# This file contains list of files where there was no way to set correct date!\n"
            )
            f.write(
                "# You probably want to set their dates manually - but you can delete this if you want\n"
            )
            f.write("\n".join(s_no_date_at_all))
            logger.info(f" - you have full list in {f.name}")

    logger.info('')
    logger.info(
        'Sooo... what now? You can see README.md for what nice G Photos alternatives I found and recommend'
    )
    logger.info('')
    logger.info(
        'If I helped you, you can consider donating me: https://www.paypal.me/TheLastGimbus'
    )
    logger.info('Have a nice day!')
Example #10
0
def tqdm(*args, **kwargs):
    return _tqdm(*args, **kwargs,
                 mininterval=1)  # Safety, do not overflow buffer
def tqdm(a):
    return _tqdm(a) if Config().tqdm else a
Example #12
0
def convert_graph(context, graph, outputs=None):
    """
    Construct Core ML ops corresponding to `graph`.

    Inputs:

    - context (TranscriptContext)

    - graph (dict of str -> ParsedTFNode): op name --> ParsedTFNode

    - outputs (list[str]): List of output names. If outputs is None, the last
      node graph (after topsort) must have op type return.

    Returns:

    list[Var]: the output Vars of the constructed Block.
    """
    connect_global_initializer(graph)
    nodes = topsort(graph)

    if outputs is None:
        # infer outputs from return
        last_node = graph[nodes[-1]]
        if last_node.op != "return":
            msg = "Expect the last node in graph to be 'return'; Got {}"
            raise ValueError(msg.format(last_node.op))
        second_last_node = graph[last_node.inputs[0]]
        if second_last_node.op == "make_tuple":
            outputs = second_last_node.inputs
        else:
            # single output function
            outputs = second_last_node.name

    # Translate the non-placeholder ops.
    num_nodes = len(nodes)
    for i, node_name in enumerate(
        _tqdm(nodes, desc="Converting Frontend ==> MIL Ops", unit=" ops")
    ):
        node = graph[node_name]
        if node.op == "return":
            continue
        logging.info(
            "[{}/{}] Converting {} op '{}'".format(i + 1, num_nodes, node.op, node.name)
        )

        if node.op == "NoOp":
            continue
        _add_op = _TF_OPS_REGISTRY.get(node.op, None)
        if _add_op is None:
            msg = "Conversion for TF op '{0}' not implemented.\n \n{1}".format(
                node.op, node.original_node
            )
            raise NotImplementedError(msg)
        _add_op(context, node)

        if len(node.outputs) > 0:
            # set_global / get_global / NoOp has no direct consumer / outputs
            x = context[node.name]
            check_output_shapes(x, node)

    output_is_list = isinstance(outputs, (tuple, list))
    if not output_is_list:
        outputs = [outputs]

    output_vars = []
    for output in outputs:
        x = context[output.split(":")[0]]
        if isinstance(x, (tuple, list)):
            idx = int(output.split(":")[1])
            output_vars.append(x[idx])
        else:
            output_vars.append(x)

    return output_vars if output_is_list else output_vars[0]
Example #13
0
def _csv2hdf(path, pixelsize):
    from glob import glob
    from tqdm import tqdm as _tqdm
    paths = glob(path)
    if paths:
        from .io import save_locs
        import os.path
        import numpy as _np
        from numpy import savetxt
        for path in _tqdm(paths):
            print('Converting {}'.format(path))

            data = _np.genfromtxt(path, dtype=float, delimiter=',', names=True)

            try:
                frames = data['frame'].astype(int)
                # make sure frames start at zero:
                frames = frames - _np.min(frames)
                x = data['x_nm']/pixelsize
                y = data['y_nm']/pixelsize
                photons = data['intensity_photon'].astype(int)

                bg = data['offset_photon'].astype(int)
                lpx = data['uncertainty_xy_nm']/pixelsize
                lpy = data['uncertainty_xy_nm']/pixelsize

                if 'z_nm' in data.dtype.names:
                    z = data['z_nm']/pixelsize
                    sx = data['sigma1_nm']/pixelsize
                    sy = data['sigma2_nm']/pixelsize

                    LOCS_DTYPE = [('frame', 'u4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'),
                      ('photons', 'f4'), ('sx', 'f4'), ('sy', 'f4'),
                      ('bg', 'f4'), ('lpx', 'f4'), ('lpy', 'f4')]

                    locs = _np.rec.array((frames, x, y, z, photons, sx, sy, bg, lpx, lpy),
                             dtype=LOCS_DTYPE)

                else:
                    sx = data['sigma_nm']/pixelsize
                    sy = data['sigma_nm']/pixelsize

                    LOCS_DTYPE = [('frame', 'u4'), ('x', 'f4'), ('y', 'f4'),
                      ('photons', 'f4'), ('sx', 'f4'), ('sy', 'f4'),
                      ('bg', 'f4'), ('lpx', 'f4'), ('lpy', 'f4')]

                    locs = _np.rec.array((frames, x, y, photons, sx, sy, bg, lpx, lpy),
                             dtype=LOCS_DTYPE)

                locs.sort(kind='mergesort', order='frame')

                img_info = {}
                img_info['Generated by'] = 'Picasso csv2hdf'
                img_info['Frames'] = int(_np.max(frames))+1
                img_info['Height'] = int(_np.ceil(_np.max(y)))
                img_info['Width'] = int(_np.ceil(_np.max(x)))

                info = []
                info.append(img_info)

                base, ext = os.path.splitext(path)
                out_path = base + '_locs.hdf5'
                save_locs(out_path, locs, info)
                print('Saved to {}.'.format(out_path))
            except Exception as e:
                print(e)
                print('Error. Datatype not understood.')
    def tqdm(*args, **kwargs):
        new_kwargs = {"mininterval": Tqdm.default_mininterval, **kwargs}

        return _tqdm(*args, **new_kwargs)
Example #15
0
def tqdm_pbar(**kwargs):
    for k, v in get_tqdm_defaults().items():
        kwargs.setdefault(k, v)
    return _tqdm(**kwargs)
Example #16
0
def align_energy_vertical(signal,
                          start=None,
                          end=None,
                          column=0,
                          smoothing_parameter=0.05,
                          number_of_iterations=1,
                          print_output=True,
                          plot_deriv=False,
                          plot_shifts=False):
    """
    Align the energy (signal) axis of a spectrum image, based off of the
    spectrum in the first column of each row. This is useful for SI of
    vertically aligned interfaces.

    If the spectrum image is uniform in the left-most column, this method
    should help to reduce the effect of energy drift that occurs during
    acquisition (if you cannot acquire the zero-loss at the same time). It will
    not correct for any drift that occurs within each row, but that is a
    much trickier problem, considering the SI likely covers different phases.

    A note on the method: the first column is extracted as a line scan. This
    data is smoothed using a Lowess filter (to reduce the impact of noise), and
    then the signal-dimension derivative is taken. This signal is passed to
    hyperspy.signal.Signal1DTools.estimate_shift1D in order to figure out
    what shift is necessary to keep the energy axis aligned. Each column
    of the original spectrum image is shifted by this same amount, and then
    the overall spectrum image is cropped in the signal dimension so there
    are no blank pixels. If the results are not quite as expected,
    try increasing the smoothing parameter, as noise in the derivative is
    the most likely reason for failure.

    Parameters
    ----------
    signal: ~hyperspy.signal.Signal
        2D spectrum image to align in energy dimension
    start: {int | float | None}
        The limits of the interval in which to align. If int they are taken
        as the axis index. If float they are taken as the axis value.
    end: {int | float | None}
        The limits of the interval in which to align. If int they are taken
        as the axis index. If float they are taken as the axis value.
    column: int
        The column of data to use for shifting. By default, the left-most (
        0) is used, but if there is no edge in this area, a different
        column should be used.
    smoothing_parameter: float
        Degree of smoothing used to smooth the original spectral data
        (necessary before taking the derivative). This parameter is passed to
        :py:meth:`~hyperspy.signal.Signal1DTools.smooth_lowess`
    number_of_iterations: int
        Number of Lowess iterations used to smooth the original spectral data
        (necessary before taking the derivative). This parameter is passed to
        :py:meth:`~hyperspy.signal.Signal1DTools.smooth_lowess`
    print_output: bool
        Whether or not to show output during calculation.
    plot_deriv: bool
        Whether or not to plot the derivative output. Useful if results are
        not as expected, and can show if more smoothing is needed
    plot_shifts: bool
        Whether or not to show a plot illustrating the shifts that were found

    Returns
    -------
    aligned_signal: ~hyperspy.signal.Signal
        2D spectrum image with signal axes aligned and cropped
    """
    s = signal.inav[column, :]
    if print_output:
        print("Smoothing column {}...".format(column))
    s.smooth_lowess(smoothing_parameter=smoothing_parameter,
                    number_of_iterations=number_of_iterations,
                    show_progressbar=True)
    sd = s.diff(-1)
    if plot_deriv:
        sd.plot()

    shifts = sd.estimate_shift1D(start=start, end=end)

    if print_output:
        print('Shifts are:')
        print(shifts)
        print('Max shift: {}'.format(_np.nanmax(shifts)))

    if plot_deriv:
        sdd = sd.deepcopy()
        sdd.shift1D(shifts, crop=False, show_progressbar=False)
        sdd.plot()

    if plot_shifts:
        u = s.axes_manager[-1].units
        med = _np.median(shifts)
        _plt.figure()
        _plt.scatter(range(len(shifts)), shifts)
        _plt.axhline(med, ls='--', c='k')
        _plt.text(0.5, med + 0.05, 'Median = {0:.2f} {1:s}'.format(med, u))
        ax = _plt.gca()
        ax.set_ylabel('Shift value({:s})'.format(u))
        ax.set_xlabel('Row #')
        _plt.xlim(0, len(shifts))
        print(("Median shift is {:.2f}".format(_np.median(shifts))))
        print(("Mean shift is {:.2f}".format(_np.mean(shifts))))

    aligned_signal = signal.deepcopy()

    for i in _tqdm(range(signal.axes_manager['x'].size),
                   desc='Aligning '
                   'spectrum '
                   'image'):
        s = signal.inav[i, :]
        s.shift1D(shifts, crop=False, show_progressbar=False)
        aligned_signal.inav[i, :] = s

    # ## This code lifted from HyperSpy's shift1D method:
    # Figure out min/max shifts, and translate to shifts in index as well
    minimum, maximum = _np.nanmin(shifts), _np.nanmax(shifts)
    axis = aligned_signal.axes_manager.signal_axes[0]
    if minimum < 0:
        ihigh = 1 + axis.value2index(axis.high_value + minimum,
                                     rounding=_math.floor)
    else:
        ihigh = axis.high_index + 1
    if maximum > 0:
        ilow = axis.value2index(axis.offset + maximum, rounding=_math.ceil)
    else:
        ilow = axis.low_index

    aligned_signal.crop(axis.index_in_axes_manager, ilow, ihigh)

    return aligned_signal
def dendrogram(hier, line=None, layout=None, show_progress=False, **kwargs):
    """
    Generates a dendrogram of a hierarchical clustering scheme in a Plotly Figure. Uses Plotly Shapes to draw the dendrogram and a scatter plot to highlight clusters at their branching points.

    Arguments
    ---------
    hier :          A Hierarchy which is to be plotted as a Dendrogram.

    Keyword Arguments
    -----------------
    line :          A dict for formatting Plotly shape lines.
                    If an attribute is given as a single string or float, will be applied to all lines.
                    If as an array of length hier.clusters.size, will be applied separately to the lines immediately beneath each cluster.

    layout :        A dictionary for updating values for the Plotly Figure layout.

    show_progress : Boolean; whether to show a tqdm progress bar as the dendrogram is generated.

    **kwargs :      Keyword arguments for the Plotly Scatter trace. 
                    If an attribute is given as a single string or float, will be applied to all branch points. 
                    If as an array of length hier.clusters.size, will be applied separately to each cluster's branch point.

    Output
    ------
    fig :           A Plotly Figure containing the dendrogram.
    """
    groups = hier.cluster_groups()

    x_items = _np.zeros([hier.items.size])
    s_max = _np.max(hier._scales)
    top_agg = hier.at_scale(s_max)
    x_base = 0
    x_in_superset = []
    for c in range(top_agg.clusters.size):
        grp = top_agg._aggregations[c]
        n = len(grp)
        x_items[grp] = _np.arange(n) + x_base
        x_base += n
        x_in_superset = x_in_superset + list(top_agg._aggregations[c])
    x_in_superset = _np.array(x_in_superset)

    x_clusters = _np.zeros([hier.clusters.size])
    y_clusters = _np.zeros([hier.clusters.size])
    fig = _go.Figure()

    lineinfo = [{} for c in range(hier.clusters.size)]
    if line is None:
        for c in range(hier.clusters.size):
            lineinfo[c] = dict(color="RoyalBlue", width=3)
    else:
        for k, v in line.items():
            if hasattr(v, '__len__') and not (isinstance(v, str)):
                for c in range(hier.clusters.size):
                    lineinfo[c][k] = v[c]
            else:
                for c in range(hier.clusters.size):
                    lineinfo[c][k] = v
    if show_progress:
        clust_iter = _tqdm(range(hier.clusters.size))
    else:
        clust_iter = range(hier.clusters.size)

    for c in clust_iter:
        x_clusters[c] = _np.average(
            x_items[groups[hier.clusters[c]].in_superset])
        y_clusters[c] = hier._scales[c]
        if len(hier._children[c]) > 0:
            xmin = _np.min(x_clusters[hier._children[c]])
            xmax = _np.max(x_clusters[hier._children[c]])
            fig.add_shape(
                # Line Horizontal
                dict(type="line",
                     x0=xmin,
                     y0=y_clusters[c],
                     x1=xmax,
                     y1=y_clusters[c],
                     line=lineinfo[c]))
            for k in hier._children[c]:
                fig.add_shape(
                    # Line Vertical
                    dict(type="line",
                         x0=x_clusters[k],
                         y0=y_clusters[k],
                         x1=x_clusters[k],
                         y1=y_clusters[c],
                         line=lineinfo[c]))

    if kwargs.get('customdata', None) is None:
        customdata = hier.clusters.elements
    if kwargs.get('hovertemplate', None) is None:
        hovertemplate = '<b>ID</b>: %{customdata} <br><b>Scale</b>: %{y} '
    fig.add_trace(
        _go.Scatter(x=x_clusters,
                    y=y_clusters,
                    mode='markers',
                    customdata=customdata,
                    hovertemplate=hovertemplate,
                    **kwargs))
    fig.update_layout(title=kwargs.get('title', 'Dendrogram'),
                      margin=dict(l=20, r=20, t=30, b=10),
                      xaxis_title=kwargs.get('x_axis_label', 'Items'),
                      yaxis_title=kwargs.get('y_axis_label', 'Scale'),
                      xaxis=dict(tickmode='array',
                                 tickvals=_np.arange(hier.items.size),
                                 ticktext=hier.items.elements[x_in_superset]))
    fig.update_shapes(layer='below')
    fig.update_xaxes(showgrid=False, zeroline=False)
    fig.update_yaxes(showgrid=False, zeroline=False)
    if layout is not None:
        fig.update_layout(layout)
    return fig
Example #18
0
def align_energy_vertical(signal,
                          start=None,
                          end=None,
                          column=0,
                          smoothing_parameter=0.05,
                          number_of_iterations=1,
                          print_output=True,
                          plot_deriv=False,
                          plot_shifts=False):
    """
    Align the energy (signal) axis of a spectrum image, based off of the
    spectrum in the first column of each row. This is useful for SI of
    vertically aligned interfaces.

    If the spectrum image is uniform in the left-most column, this method
    should help to reduce the effect of energy drift that occurs during
    acquisition (if you cannot acquire the zero-loss at the same time). It will
    not correct for any drift that occurs within each row, but that is a
    much trickier problem, considering the SI likely covers different phases.

    A note on the method: the first column is extracted as a line scan. This
    data is smoothed using a Lowess filter (to reduce the impact of noise), and
    then the signal-dimension derivative is taken. This signal is passed to
    hyperspy.signal.Signal1DTools.estimate_shift1D in order to figure out
    what shift is necessary to keep the energy axis aligned. Each column
    of the original spectrum image is shifted by this same amount, and then
    the overall spectrum image is cropped in the signal dimension so there
    are no blank pixels. If the results are not quite as expected,
    try increasing the smoothing parameter, as noise in the derivative is
    the most likely reason for failure.

    Parameters
    ----------
    signal: ~hyperspy.signal.Signal
        2D spectrum image to align in energy dimension
    start: {int | float | None}
        The limits of the interval in which to align. If int they are taken
        as the axis index. If float they are taken as the axis value.
    end: {int | float | None}
        The limits of the interval in which to align. If int they are taken
        as the axis index. If float they are taken as the axis value.
    column: int
        The column of data to use for shifting. By default, the left-most (
        0) is used, but if there is no edge in this area, a different
        column should be used.
    smoothing_parameter: float
        Degree of smoothing used to smooth the original spectral data
        (necessary before taking the derivative). This parameter is passed to
        :py:meth:`~hyperspy.signal.Signal1DTools.smooth_lowess`
    number_of_iterations: int
        Number of Lowess iterations used to smooth the original spectral data
        (necessary before taking the derivative). This parameter is passed to
        :py:meth:`~hyperspy.signal.Signal1DTools.smooth_lowess`
    print_output: bool
        Whether or not to show output during calculation.
    plot_deriv: bool
        Whether or not to plot the derivative output. Useful if results are
        not as expected, and can show if more smoothing is needed
    plot_shifts: bool
        Whether or not to show a plot illustrating the shifts that were found

    Returns
    -------
    aligned_signal: ~hyperspy.signal.Signal
        2D spectrum image with signal axes aligned and cropped
    """
    s = signal.inav[column, :]
    if print_output:
        print("Smoothing column {}...".format(column))
    s.smooth_lowess(smoothing_parameter=smoothing_parameter,
                    number_of_iterations=number_of_iterations,
                    show_progressbar=True)
    sd = s.diff(-1)
    if plot_deriv:
        sd.plot()

    shifts = sd.estimate_shift1D(start=start, end=end)

    if print_output:
        print('Shifts are:')
        print(shifts)
        print('Max shift: {}'.format(_np.nanmax(shifts)))

    if plot_deriv:
        sdd = sd.deepcopy()
        sdd.shift1D(shifts, crop=False, show_progressbar=False)
        sdd.plot()

    if plot_shifts:
        u = s.axes_manager[-1].units
        med = _np.median(shifts)
        _plt.figure()
        _plt.scatter(range(len(shifts)), shifts)
        _plt.axhline(med, ls='--', c='k')
        _plt.text(0.5, med + 0.05, 'Median = {0:.2f} {1:s}'.format(med, u))
        ax = _plt.gca()
        ax.set_ylabel('Shift value({:s})'.format(u))
        ax.set_xlabel('Row #')
        _plt.xlim(0, len(shifts))
        print(("Median shift is {:.2f}".format(_np.median(shifts))))
        print(("Mean shift is {:.2f}".format(_np.mean(shifts))))

    aligned_signal = signal.deepcopy()

    for i in _tqdm(range(signal.axes_manager['x'].size), desc='Aligning '
                                                              'spectrum '
                                                              'image'):
        s = signal.inav[i, :]
        s.shift1D(shifts, crop=False, show_progressbar=False)
        aligned_signal.inav[i, :] = s

    # ## This code lifted from HyperSpy's shift1D method:
    # Figure out min/max shifts, and translate to shifts in index as well
    minimum, maximum = _np.nanmin(shifts), _np.nanmax(shifts)
    axis = aligned_signal.axes_manager.signal_axes[0]
    if minimum < 0:
        ihigh = 1 + axis.value2index(
            axis.high_value + minimum,
            rounding=_math.floor)
    else:
        ihigh = axis.high_index + 1
    if maximum > 0:
        ilow = axis.value2index(axis.offset + maximum,
                                rounding=_math.ceil)
    else:
        ilow = axis.low_index

    aligned_signal.crop(axis.index_in_axes_manager,
                        ilow,
                        ihigh)

    return aligned_signal
Example #19
0
    def tqdm(*args, **kwargs):
        new_kwargs = {}
        new_kwargs['mininterval'] = Tqdm.default_mininterval
        new_kwargs.update(kwargs)

        return _tqdm(*args, **new_kwargs)