def main():
    dataset = 'CB1'
    neuropil = 'fake'
    filenames = util.get_filenames(dataset, neuropil)
    calculate_distance.save_distances( filenames['h5'], filenames['distance_npy'])
    D = np.load(filenames['distance_npy'])
    k=3
    clusters, curr_medoids = kMedoids.cluster(D,k=k)
    cluster_type = 'K%02d_dicedist'%(k,)
    filenames = util.get_filenames(dataset, neuropil, cluster_type=cluster_type)
    nrrd_fname = filenames['clustering_result_nrrd']
    save_to_nrrd( filenames['h5'], clusters, nrrd_fname )
Exemple #2
0
def main():
    # get command line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("-p", "--probability", type=float, default=TRAIN_PROP)
    args = parser.parse_args()

    # parse the file and sample
    files = get_filenames()
    data_parser = parse(file(files.dataset))
    training_set, testing_set = sample(data_parser, args.probability)

    # write to file as json
    with file(files.train, 'w') as train_file:
        json.dump(training_set, train_file)
        print 'Dumped training set to %s,' % files.train,
        print 'size', len(training_set)
    with file(files.test, 'w') as test_file:
        json.dump(testing_set, test_file)
        print 'Dumped test set to %s,' % files.test,
        print 'size', len(testing_set)

    # check the file is written correctly
    with file(files.train, 'r') as train_file:
        assert json.load(train_file) == training_set
        print 'Training file check OK.'
    with file(files.test, 'r') as test_file:
        assert json.load(test_file) == testing_set
        print 'Test file check OK.'
Exemple #3
0
def _fill_missing_args(args):
    (width, height) = determine_shape(args, args.projections, store=False)
    args.center_position_x = (args.center_position_x or [width / 2.])
    args.center_position_z = (args.center_position_z or [height / 2.])

    if not args.overall_angle:
        args.overall_angle = 360.
        LOG.info('Overall angle not specified, using 360 deg')

    if not args.number:
        if len(args.axis_angle_z) > 1:
            LOG.debug("--number not specified, using length of --axis-angle-z: %d",
                      len(args.axis_angle_z))
            args.number = len(args.axis_angle_z)
        else:
            num_files = len(get_filenames(args.projections))
            if not num_files:
                raise RuntimeError("No files found in `{}'".format(args.projections))
            LOG.debug("--number not specified, using number of files matching "
                      "--projections pattern: %d", num_files)
            args.number = num_files

    if args.dry_run:
        if not args.number:
            raise ValueError('--number must be specified by --dry-run')
        determine_shape(args, args.projections, store=True)
        LOG.info('Dummy data W x H x N: {} x {} x {}'.format(args.width,
                                                             args.height,
                                                             args.number))

    return args
Exemple #4
0
def main():
    dataset_functions = {
        'bp': process_bp,
        'gn1': process_gn1,
        'other': process_other
    }
    ap = argparse.ArgumentParser()
    ap.add_argument('INPUT', help='Input directory (html files)')
    ap.add_argument('OUTPUT', help='Output directory')
    ap.add_argument('DATASET',
                    choices=dataset_functions.keys(),
                    help='Dataset type')
    ap.add_argument('--prefix', help='Add a prefix to the file names.')
    args = ap.parse_args()
    os.makedirs(args.OUTPUT, exist_ok=True)

    for f in tqdm(util.get_filenames(args.INPUT, '.html')):
        try:
            with open(f, 'rb') as hfile:
                doc = BeautifulSoup(hfile, features='html5lib')
            # for some reason, parsing malformed HTML twice works better
            doc2 = BeautifulSoup(doc.prettify(), features='html5lib')
            process(doc2, dataset_functions[args.DATASET])
            f_name = os.path.basename(f)
            if args.prefix:
                f_name = args.prefix + f_name
            with open(os.path.join(args.OUTPUT, f_name), 'w',
                      encoding='utf-8') as hfile:
                hfile.write(doc2.prettify())
        except:
            tqdm.write('error processing {}'.format(f))
Exemple #5
0
def _fill_missing_args(args):
    (width, height) = determine_shape(args, args.projections, store=False)
    args.center_position_x = (args.center_position_x or [width / 2.])
    args.center_position_z = (args.center_position_z or [height / 2.])

    if not args.overall_angle:
        args.overall_angle = 360.
        LOG.info('Overall angle not specified, using 360 deg')

    if not args.number:
        if len(args.axis_angle_z) > 1:
            LOG.debug(
                "--number not specified, using length of --axis-angle-z: %d",
                len(args.axis_angle_z))
            args.number = len(args.axis_angle_z)
        else:
            num_files = len(get_filenames(args.projections))
            if not num_files:
                raise RuntimeError("No files found in `{}'".format(
                    args.projections))
            LOG.debug(
                "--number not specified, using number of files matching "
                "--projections pattern: %d", num_files)
            args.number = num_files

    if args.dry_run:
        if not args.number:
            raise ValueError('--number must be specified by --dry-run')
        determine_shape(args, args.projections, store=True)
        LOG.info('Dummy data W x H x N: {} x {} x {}'.format(
            args.width, args.height, args.number))

    return args
def main():
    # get command line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("-p", "--probability",
                        type=float, default=TRAIN_PROP)
    args = parser.parse_args()

    # parse the file and sample
    files = get_filenames()
    data_parser = parse(file(files.dataset))
    training_set, testing_set = sample(data_parser, args.probability)

    # write to file as json
    with file(files.train, 'w') as train_file:
        json.dump(training_set, train_file)
        print 'Dumped training set to %s,' % files.train,
        print 'size', len(training_set)
    with file(files.test, 'w') as test_file:
        json.dump(testing_set, test_file)
        print 'Dumped test set to %s,' % files.test,
        print 'size', len(testing_set)

    # check the file is written correctly
    with file(files.train, 'r') as train_file:
        assert json.load(train_file) == training_set
        print 'Training file check OK.'
    with file(files.test, 'r') as test_file:
        assert json.load(test_file) == testing_set
        print 'Test file check OK.'
Exemple #7
0
def main():
    #src_path = sys.argv[1]
    #    print('src_path ', src_path)
    #   filenames = get_filenames(src_path)
    parser = argparse.ArgumentParser()
    parser.add_argument("-i",
                        "--input",
                        type=int,
                        choices=range(100),
                        help="index for single input image")
    parser.add_argument("-t",
                        "--tmin",
                        type=float,
                        default=0.2,
                        help="minimum transmission rate")
    parser.add_argument("-A",
                        "--Amax",
                        type=int,
                        default=220,
                        help="maximum atmosphere light")
    parser.add_argument("-w",
                        "--window",
                        type=int,
                        default=15,
                        help="window size of dark channel")
    parser.add_argument("-r",
                        "--radius",
                        type=int,
                        default=40,
                        help="radius of guided filter")
    parser.add_argument("-p",
                        "--path",
                        type=str,
                        default=1,
                        help="path of dataset")
    args = parser.parse_args()
    src_path = 'img/' + str(args.path)
    print('src_path ', src_path)
    filenames = get_filenames(src_path)
    '''if args.input is not None: 
        src, dest = filenames[args.input]
        dest = dest.replace("%s",
                     "%s-%d-%d-%d-%d" % ("%s", args.tmin * 100, args.Amax,
                                           args.window, args.radius))
        generate_results(src, dest, partial(dehaze, tmin=args.tmin, Amax=args.Amax,
                                           w=args.window, r=args.radius))
    else:'''
    if len(src_path) > 0:  # dummy case to make default indentation work
        '''for idx in SP_IDX:
            src, dest = filenames[idx]
            for param in SP_PARAMS:
                newdest = dest.replace("%s",
                     "%s-%d-%d-%d-%d" % ("%s", param['tmin'] * 100,
                                         param['Amax'], param['w'],
                                         param['r']))
                generate_results(src, newdest, partial(dehaze, **param))'''

        for src, dest in filenames:
            generate_results(src, dest, dehaze)
Exemple #8
0
def create_data():
    names_set = find_names()

    site = {}

    for file in glob.glob("../data/documents/*.txt"):
        name = file.split('.txt')[0].split('/')[-1]
        if name in names_set:
            site[name] = create_link(name)
            print(name)

    files = {}

    for fn in util.get_filenames('../data/documents'):
        with open(fn) as file:
            text = util.cut_away_sources(file.read())
            files[fn.name.split('.txt')[0]] = text

    occ = collections.defaultdict(list)

    # search for occurences of people in all documents
    for name in names_set:
        for page_name, text in files.items():
            if name in text:
                occ[name].append(create_link(page_name))
    print(occ)

    merged = {}

    print(site)
    print(site.keys())

    # # take filenames as special interest and search in all files
    # remaining_files = [f for f in files.keys() if f not in occ.keys()]

    # print(remaining_files)

    # for name in remaining_files:
    #     for page_name, text in files.items():
    #         if name in text:
    #             occ[name].append(create_link(page_name))

    # create final data object
    for name, pages in occ.items():
        if name in site.keys():
            # remove profile from other pages
            if site[name] in pages:
                pages.remove(site[name])
            merged[name] = {'pages': pages, 'profile': site[name]}
        else:
            merged[name] = {'pages': pages}

    with open('../extension/data.js', 'w') as outfile:
        json.dump(merged, outfile, ensure_ascii=False)
    with open('../extension/data.js', 'r+') as file:
        text_json = file.read()
        file.seek(0)
        file.write('var data = ' + text_json + ';')
        file.truncate()
def get_gaba_data_dict(filelist):
    """Returns a mapping from GABA conductance to TraubData objects"""
    ret = defaultdict(list)
    for fname in get_filenames(filelist):
        print fname, makepath(fname)
        data = TraubData(makepath(fname))
        gaba = dict(data.fdata['/runconfig/GABA'])
        ret[float(gaba['conductance_scale'])].append(data)

    return ret
def get_gaba_data_dict(filelist):
    """Returns a mapping from GABA conductance to TraubData objects"""
    ret = defaultdict(list)
    for fname in get_filenames(filelist):        
        print fname, makepath(fname)
        data = TraubData(makepath(fname))
        gaba = dict(data.fdata['/runconfig/GABA'])
        ret[float(gaba['conductance_scale'])].append(data)
        
    return ret
Exemple #11
0
def main():
    filenames = get_filenames()
    parser = argparse.ArgumentParser()
    parser.add_argument("-i",
                        "--input",
                        type=int,
                        choices=range(len(filenames)),
                        help="index for single input image")
    parser.add_argument("-t",
                        "--tmin",
                        type=float,
                        default=0.2,
                        help="minimum transmission rate")
    parser.add_argument("-A",
                        "--Amax",
                        type=int,
                        default=220,
                        help="maximum atmosphere light")
    parser.add_argument("-w",
                        "--window",
                        type=int,
                        default=15,
                        help="window size of dark channel")
    parser.add_argument("-r",
                        "--radius",
                        type=int,
                        default=40,
                        help="radius of guided filter")

    args = parser.parse_args()

    if args.input is not None:
        src, dest = filenames[args.input]
        dest = dest.replace(
            "%s", "%s-%d-%d-%d-%d" %
            ("%s", args.tmin * 100, args.Amax, args.window, args.radius))
        generate_results(
            src, dest,
            partial(dehaze,
                    tmin=args.tmin,
                    Amax=args.Amax,
                    w=args.window,
                    r=args.radius))
    else:
        for idx in SP_IDX:
            src, dest = filenames[idx]
            for param in SP_PARAMS:
                newdest = dest.replace(
                    "%s",
                    "%s-%d-%d-%d-%d" % ("%s", param['tmin'] * 100,
                                        param['Amax'], param['w'], param['r']))
                generate_results(src, newdest, partial(dehaze, **param))

        for src, dest in filenames:
            generate_results(src, dest, dehaze)
def dump_ss_fraction_peaks(flistfilename,
                           trange=(2, 20),
                           cutoff=0.2,
                           binsize=5e-3,
                           lookahead=10):
    """Plot the peaks in fraction of spiny stellate cells over multiple
    simulations."""
    #data_dict = get_gaba_data_dict(flistfilename)
    peak_frac_med = defaultdict(list)
    peak_frac_mean = defaultdict(list)
    iqr_dict = defaultdict(list)
    with open(
            'gaba_scale_ss_frac_cutoff_{}_binwidth_{}ms_lookahead_{}.csv'.
            format(cutoff, binsize * 1000, lookahead), 'wb') as fd:
        writer = csv.writer(fd,
                            delimiter=',',
                            quotechar='"',
                            quoting=csv.QUOTE_MINIMAL)
        writer.writerow(
            ('filename', 'gabascale', 'frac_mean', 'frac_med', 'frac_iqr'))
        for fname in get_filenames(flistfilename):
            data = TraubData(makepath(fname))
            gaba = dict(data.fdata['/runconfig/GABA'])
            scale = gaba['conductance_scale']
            print fname, gaba
            hist, bins = data.get_spiking_cell_hist('SpinyStellate',
                                                    timerange=trange,
                                                    binsize=binsize,
                                                    frac=True)
            peaks, troughs = peakdetect(hist, bins[:-1], lookahead=lookahead)
            if len(peaks) == 0:
                print 'No peaks for', data.fdata.filename
                writer.writerow((fname, scale, '', '', ''))
                continue
            x, y = zip(*peaks)
            x = np.asarray(x)
            y = np.asarray(y)
            idx = np.flatnonzero(y > cutoff)
            frac_med = ''
            frac_mean = ''
            iqr = ''
            if len(idx) > 0:
                frac_med = np.median(y[idx])
                frac_mean = np.mean(y[idx])
                iqr = np.diff(np.percentile(y[idx], [25, 75]))
                if len(iqr) > 0:
                    iqr = iqr[0]
                else:
                    iqr = ''
            peak_frac_med[scale].append(frac_med)
            peak_frac_mean[scale].append(frac_mean)
            iqr_dict[scale].append(iqr)
            writer.writerow((fname, scale, frac_mean, frac_med, iqr))
    return peak_frac_mean, peak_frac_med, iqr_dict
Exemple #13
0
def plot_vm_fft_multifile(flist, figfile, trange=(2,20)):
    """Combine the Vm from cells of each celltype in all simulations and
    plot FT for each celltype. Since FT is linear, we can just sum the
    FT of individual simulations to get that of the sum of the Vms

    """
    celltype_ft_dict = {}
    freq_comp = None
    for fname in get_filenames(flist):
        result = population_vm_spectrum(fname,  trange)
        result.pop('TCR')
        for celltype, (freq, ft) in result.items():
            if freq_comp is not None:
                np.testing.assert_array_equal(freq_comp, freq)
            else:
                freq_comp = freq
            if celltype in celltype_ft_dict:
                celltype_ft_dict[celltype] += ft
            else:
                celltype_ft_dict[celltype] = ft
    fig = plt.figure()
    ax = None
    # major_formatter = plt.FormatStrFormatter('%1.1g')
    for ii, celltype in enumerate(CELLTYPES):
        ax = fig.add_subplot(len(celltype_ft_dict), 1, ii+1, sharex=ax)
        ft = celltype_ft_dict[celltype]
        idx = (freq_comp > 0) & (freq_comp < 100)
        
        ax.plot(freq_comp[idx], ft[idx], color=cellcolor[celltype],
                label=celltype)
        # print ii, celltype
        # ax.legend()
        # ymax = ax.get_ylim()[1]
        # p10 = np.log10(ymax)
        # ymax = np.ceil(ymax / 10**int(p10)) * 10**int(p10) 
        # ax.set_ylim((0, ymax))
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)        
        ax.yaxis.tick_left()
        ax.xaxis.tick_bottom()
        # ax.yaxis.set_major_formatter(major_formatter)
        # ax.set_yticks(ax.get_ylim())
        # ax.xaxis.set_visible(False)
        # plt.setp(ax, frame_on=False)
    # ax.set_visible(True)
    # ax.get_xaxis().set_visible(True)
    ax.set_xlabel('Frequency (Hz)')
    ax.set_ylabel('power')
    #ax.set_yticks([-15, 0, 15])
    ax.tick_params(axis='x', which='major', labelsize=11)
    plt.tight_layout()
    plt.savefig(figfile)
    plt.show()
Exemple #14
0
def total_length(in_dir):
    """Calculate the total length of all files(in seconds)."""
    filenames = []
    for ext in SUPPORTED_FORMAT:
        filenames.extend(util.get_filenames(in_dir, extention=ext))
    print(filenames)
    length = 0
    for fname in filenames:
        fpath = os.path.join(in_dir, fname)
        mas = MyAudioSegment(fpath)
        length += mas.length_in_seconds
        print('Length of {} is {}s'.format(fpath, length))
    return length
Exemple #15
0
def semengt_by_step(in_dir, out_dir, time_step, offset=0):
    """Segment by time steps(in seconds)."""
    filenames = []
    for ext in SUPPORTED_FORMAT:
        filenames.extend(util.get_filenames(in_dir, extention=ext))
    print(filenames)
    N = 0
    for fname in filenames:
        fpath = os.path.join(in_dir, fname)
        mas = MyAudioSegment(fpath, verbose=True)
        n = mas.segment_second_step(out_dir, time_step, offset)
        N += n
    return N, len(filenames)
Exemple #16
0
def main():
    files = get_filenames()
    x, y = [], []

    # generate the learning curve data
    data = list(parse(file(files.dataset)))
    for train_prop in np.arange(0.1, 0.99, 0.05):
        training_set, testing_set = sample(data, train_prop)
        tree = build_tree(training_set).prune(MIN_GAIN)
        check = [record[RESULT_IDX] == plurality(tree.classify(record))
                 for record in testing_set]
        counter = Counter(check)
        precision = counter[True] / float(counter[True] + counter[False])
        print 'Training set sampling probability = %.2f:' % (train_prop)
        print 'training data size = %d,' % (len(training_set)),
        print 'test data size = %d,' % (len(testing_set)),
        print 'precision = %.4f' % (precision)
        x.append(len(training_set))
        y.append(precision)

    # statistics
    ymean, ystd, ymin, ymax = np.mean(y), np.std(y), np.min(y), np.max(y)
    print 'Mean of precision = %.4f' % (ymean)
    print 'Standard deviation of precision = %.4f' % (ystd)
    print 'Min = %.4f, max = %.4f' % (ymin, ymax)
    xy = sorted(zip(x, y), key=lambda a: a[0])
    x, y = zip(*xy)

    # setup decorations
    plt.rc('font', family='serif')
    plt.yticks(np.arange(0.0, 1.0, 0.1))
    plt.ylim(0.0, 1.0)
    plt.grid(True)
    plt.title('Learning Curve')
    plt.xlabel('Training set size')
    plt.ylabel('Precision on test set')

    # plot smoothed learning curve
    xnew = np.linspace(np.min(x), np.max(x), 100)
    ynew = interp1d(x, y)(xnew)
    plt.plot(x, y, '.', xnew, ynew, '--')

    # annotation
    box = dict(boxstyle='square', fc="w", ec="k")
    txt = '$\mu = %.4f$, $\sigma = %.4f$' % (ymean, ystd)
    txt += ', $min = %.4f$, $max = %.4f$' % (ymin, ymax)
    plt.text(170, 0.05, txt, bbox=box)

    plt.savefig(files.curve)
    print 'Save learning curve to', files.curve
def multifile_firing_rate_distribution(flist='unconnected_network.csv',
                                                   figfilename='Figure_3B.svg', trange=(2,20)):
    """Plots histograms showing distribution of firing rates among cells
    of each type collected from multiple simulations.

    """
    start = trange[0]
    end = trange[1]
    rates = defaultdict(list)    
    for fname in get_filenames(flist):
        data = TraubData(makepath(fname))
        if data.simtime < end:
            end = data.simtime
        for celltype in CELLTYPES:
            for cell, spiketrain in data.spikes.items():
                if cell.startswith(celltype):
                    rate = 1.0 * np.count_nonzero((spiketrain > start) & (spiketrain < end)) / (end - start)
                    rates[celltype].append(rate)
    bins = np.arange(0, 61.0, 5.0)
    hists = {}
    prev = np.zeros(len(bins) - 1)
    ax = None
    for ii, celltype in enumerate(CELLTYPES):
        ctype_rates = rates[celltype]
        ax = plt.subplot(len(rates), 1, ii+1, sharex=ax, sharey=ax)
        h, b = np.histogram(ctype_rates, bins=bins)
        h = np.asarray(h, dtype='float64') / len(ctype_rates)
        x = bins[:-1]
        plt.bar(x,
                h,
                color=cellcolor[celltype],
                width=(bins[1]-bins[0]))
                # bottom = prev, color=cellcolor[celltype], label=celltype)
        prev += h        
        ax.tick_params(axis='y', right=False, left=False)
        # plt.setp(ax, frame_on=False)
        ax.tick_params(axis='x', top=False, bottom=True)        
        ax.spines['bottom'].set_color((0, 0, 0, 0))
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        # ax.xaxis.set_visible(False)
    ax.xaxis.set_visible(True)
    ax.set_xticks(bins[::2])
    ax.set_yticks([0, 1.0])
    ax.tick_params(axis='y', left=True)
    plt.xlabel('Firing rate (Hz)')
    plt.tight_layout()
    plt.savefig(figfilename)
    plt.show()
def multifile_firing_rate_distribution(flist='unconnected_network.csv',
                                       figfilename='Figure_3B.svg',
                                       trange=(2, 20)):
    """Plots histograms showing distribution of firing rates among cells
    of each type collected from multiple simulations.

    """
    start = trange[0]
    end = trange[1]
    rates = defaultdict(list)
    for fname in get_filenames(flist):
        data = TraubData(makepath(fname))
        if data.simtime < end:
            end = data.simtime
        for celltype in CELLTYPES:
            for cell, spiketrain in data.spikes.items():
                if cell.startswith(celltype):
                    rate = 1.0 * np.count_nonzero((spiketrain > start) & (
                        spiketrain < end)) / (end - start)
                    rates[celltype].append(rate)
    bins = np.arange(0, 61.0, 5.0)
    hists = {}
    prev = np.zeros(len(bins) - 1)
    ax = None
    for ii, celltype in enumerate(CELLTYPES):
        ctype_rates = rates[celltype]
        ax = plt.subplot(len(rates), 1, ii + 1, sharex=ax, sharey=ax)
        h, b = np.histogram(ctype_rates, bins=bins)
        h = np.asarray(h, dtype='float64') / len(ctype_rates)
        x = bins[:-1]
        plt.bar(x, h, color=cellcolor[celltype], width=(bins[1] - bins[0]))
        # bottom = prev, color=cellcolor[celltype], label=celltype)
        prev += h
        ax.tick_params(axis='y', right=False, left=False)
        # plt.setp(ax, frame_on=False)
        ax.tick_params(axis='x', top=False, bottom=True)
        ax.spines['bottom'].set_color((0, 0, 0, 0))
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        # ax.xaxis.set_visible(False)
    ax.xaxis.set_visible(True)
    ax.set_xticks(bins[::2])
    ax.set_yticks([0, 1.0])
    ax.tick_params(axis='y', left=True)
    plt.xlabel('Firing rate (Hz)')
    plt.tight_layout()
    plt.savefig(figfilename)
    plt.show()
Exemple #19
0
def semengt_by_label(in_dir, out_dir, label_dir, offset=0):
    """Segment audio by time interval(in seconds)."""
    filenames = []
    for ext in SUPPORTED_FORMAT:
        filenames.extend(util.get_filenames(in_dir, extention=ext))
    print(filenames)
    N = 0
    for fname in filenames:
        fpath = os.path.join(in_dir, fname)
        mas = MyAudioSegment(fpath)
        label_filename = mas.name + '.txt'
        label_path = os.path.join(label_dir, label_filename)
        if not util.is_exists(label_path): continue
        n = mas.segment_with_label_file(label_path, out_dir, offset)
        N += n
    return N, len(filenames)
Exemple #20
0
def overlay_unconnected_fft(flist, figfile, trange=(2,20)):
    fig = plt.figure()
    # major_formatter = plt.FormatStrFormatter('%1.3g')
    celltype_ax_dict = {}
    for fname in get_filenames(flist):
        result = population_vm_spectrum(fname, trange)
        result.pop('TCR')
        nax = len(result)
        for ii, celltype in enumerate(CELLTYPES):
            ax = celltype_ax_dict.get(celltype, None)
            if ax is None:
                ax = fig.add_subplot(nax, 1, ii+1, sharex=ax)
                celltype_ax_dict[celltype] = ax
            freq, ft, = result[celltype]
            idx = (freq > 1) & (freq < 100)
            ax.plot(freq[idx], ft[idx], # color=cellcolor[celltype],
                    label=celltype, alpha=0.5)
    plt.show()
def dump_ss_fraction_peaks(flistfilename, trange=(2,20), cutoff=0.2, binsize=5e-3, lookahead=10):
    """Plot the peaks in fraction of spiny stellate cells over multiple
    simulations."""
    #data_dict = get_gaba_data_dict(flistfilename)
    peak_frac_med = defaultdict(list)
    peak_frac_mean = defaultdict(list)
    iqr_dict = defaultdict(list)
    with open('gaba_scale_ss_frac_cutoff_{}_binwidth_{}ms_lookahead_{}.csv'.format(cutoff, binsize*1000, lookahead), 'wb') as fd:
        writer = csv.writer(fd, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        writer.writerow(('filename', 'gabascale',  'frac_mean', 'frac_med', 'frac_iqr'))
        for fname in get_filenames(flistfilename):
            data = TraubData(makepath(fname))
            gaba = dict(data.fdata['/runconfig/GABA'])
            scale = gaba['conductance_scale']
            print fname, gaba
            hist, bins = data.get_spiking_cell_hist('SpinyStellate',
                                                    timerange=trange,
                                                    binsize=binsize,
                                                    frac=True)
            peaks, troughs = peakdetect(hist, bins[:-1], lookahead=lookahead)
            if len(peaks) == 0:
                print 'No peaks for', data.fdata.filename
                writer.writerow((fname, scale, '', '', ''))
                continue
            x, y = zip(*peaks)
            x = np.asarray(x)
            y = np.asarray(y)
            idx = np.flatnonzero(y > cutoff)
            frac_med = ''
            frac_mean = ''
            iqr = ''
            if len(idx) > 0:
                frac_med = np.median(y[idx])
                frac_mean = np.mean(y[idx])
                iqr = np.diff(np.percentile(y[idx], [25,75]))
                if len(iqr) > 0:
                    iqr = iqr[0]
                else:
                    iqr = ''
            peak_frac_med[scale].append(frac_med)
            peak_frac_mean[scale].append(frac_mean)
            iqr_dict[scale].append(iqr)
            writer.writerow((fname, scale, frac_mean, frac_med, iqr))
    return peak_frac_mean, peak_frac_med, iqr_dict
Exemple #22
0
def plot_frequency_distribution(flist, figfile, trange=(2,20),
                                freqbins=[0, 4, 10, 20, 40, 80,]):
    """Plot the distribution of various frequency peaks over multiple
    simulations of the unconnected network.

    Divide the range of frequencies into freqbins and then plot
    histogram of FT values in those.

    """
    celltype_freq_dict = defaultdict(dict)
    
    for fname in get_filenames(flist):
        result = population_vm_spectrum(fname, trange)
        result.pop('TCR')
        nax = len(result)
        for celltype, (freq, ft) in result.items():
            for ii, start in enumerate(freqbins):
                ft_list = celltype_freq_dict[celltype].get(start, [])
                print '$$', ii, freqbins[ii]
                if ii < len(freqbins) - 1:
                    idx = (freq >= start) & (freq < freqbins[ii+1])
                else:
                    idx = freq >= start
                nonzero = ft[idx][ft[idx] > 0]
                ft_list += list(nonzero)
                celltype_freq_dict[celltype][start] = ft_list
    fig = plt.figure()
    ax = None
    axno = 0
    for ii, celltype in enumerate(CELLTYPES):
        for jj, start in enumerate(freqbins):            
            axno += 1
            print '##', celltype, start, axno
            ax = fig.add_subplot(len(celltype_freq_dict), len(freqbins), axno)
            ax.hist(celltype_freq_dict[celltype][start], log=True)
            ax.set_xlabel('{}-{}'.format(start, 'inf' if jj == len(freqbins)-1 else freqbins[jj+1]))
            if jj == 0:
                ax.set_ylabel(celltype)
    # plt.tight_layout()
    plt.savefig(figfile)
    plt.show()
def main():
    filenames = get_filenames()
    parser = argparse.ArgumentParser(
        description=
        'Underwater Image Restoration by Blue-Green Channels Dehazing and Red Channel Correction'
    )
    parser.add_argument(
        "-i",
        "--input",
        type=int,
        choices=range(len(filenames)),
        help="index for single input image: {} corresponds to indexes {}".
        format(filenames[0][0], list(range(len(filenames)))))
    parser.add_argument("-w",
                        "--window",
                        type=int,
                        default=15,
                        help="window size of dark channel")
    args = parser.parse_args()

    src, dest = filenames[args.input]
    generate_results(src, dest, partial(adaptiveExp_map, w=args.window))
Exemple #24
0
def main():
    files = get_filenames()
    train_data = json.load(file(files.train))
    test_data = json.load(file(files.test))

    print 'Before pruning:'
    tree = build_tree(train_data)
    check = [record[RESULT_IDX] == plurality(tree.classify(record))
             for record in test_data]
    tree.to_image().save(files.tree)
    print Counter(check)
    print 'Saved tree plot to', files.tree
    print '----------------------'

    print 'After pruning:'
    tree.prune(0.1)
    check = [record[RESULT_IDX] == plurality(tree.classify(record))
             for record in test_data]
    tree.to_image().save(files.pruned_tree)
    print Counter(check)
    print 'Saved pruned tree plot to', files.pruned_tree
    print '----------------------'
def main():
    filenames = get_filenames()
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--input", type=int,
                        choices=range(len(filenames)),
                        help="index for single input image")
    parser.add_argument("-t", "--tmin", type=float, default=0.2,
                        help="minimum transmission rate")
    parser.add_argument("-A", "--Amax", type=int, default=220,
                        help="maximum atmosphere light")
    parser.add_argument("-w", "--window", type=int, default=15,
                        help="window size of dark channel")
    parser.add_argument("-r", "--radius", type=int, default=40,
                        help="radius of guided filter")

    args = parser.parse_args()

    if args.input is not None:
        src, dest = filenames[args.input]
        dest = dest.replace("%s",
                     "%s-%d-%d-%d-%d" % ("%s", args.tmin * 100, args.Amax,
                                           args.window, args.radius))
        generate_results(src, dest, partial(dehaze, tmin=args.tmin, Amax=args.Amax,
                                           w=args.window, r=args.radius))
    else:
        for idx in SP_IDX:
            src, dest = filenames[idx]
            for param in SP_PARAMS:
                newdest = dest.replace("%s",
                     "%s-%d-%d-%d-%d" % ("%s", param['tmin'] * 100,
                                         param['Amax'], param['w'],
                                         param['r']))
                generate_results(src, newdest, partial(dehaze, **param))

        for src, dest in filenames:
            generate_results(src, dest, dehaze)
Exemple #26
0
    config = get_config(sys.argv[1])
    # experiment = Experiment("wXwnV8LZOtVfxqnRxr65Lv7C2")
    comet_dir_path = os.path.join(config["result_directory"], config["model"])
    makedirs(comet_dir_path)
    experiment = OfflineExperiment(
        project_name="DeepGenomics",
        offline_directory=comet_dir_path)
    experiment.log_parameters(config)
    if torch.cuda.is_available():
        # torch.cuda.set_device(str(os.environ["CUDA_VISIBLE_DEVICES"]))
        device = torch.device('cuda:{}'.format(os.environ["CUDA_VISIBLE_DEVICES"]))
    else:
        device = torch.device('cpu')
    print(device)
    number_of_examples = len(get_filenames(os.path.join(config["data"], "x")))
    list_ids = [str(i) for i in range(number_of_examples)]
    random.shuffle(list_ids)
    t_ind, v_ind = round(number_of_examples * 0.7), round(number_of_examples * 0.9)
    train_indices, validation_indices, test_indices = list_ids[:t_ind], list_ids[t_ind:v_ind], list_ids[v_ind:]
    
    params = {'batch_size': config["training"]["batch_size"],
              'shuffle': config["training"]["shuffle"],
              'num_workers': config["training"]["num_workers"]}
    
    # Build generators
    training_set = Dataset(config["data"], train_indices)
    training_generator = data.DataLoader(training_set, **params)
    
    validation_set = Dataset(config["data"], validation_indices)
    validation_generator = data.DataLoader(validation_set, **params)
Exemple #27
0
def find_names():
    fns = util.get_filenames('../data/res2')
    all_names = set()
    for fn in fns:
        print(fn.name)
        with open(f'../data/res2/{fn.name}') as f:
            text = f.read()
        lines = text.split('\n')
        lines = [l.split(' ') for l in lines if ' ' in l]

        res = []

        index = 0
        while index < len(lines):
            tok, tag = lines[index]
            next_possible = index + 1 < len(lines)

            # find special cases
            if tag == 'B-PERparg' and next_possible:
                tok_n, tag_n = lines[index + 1]
                if tag_n == 'S-PER':
                    res.append(tok + ' ' + tok_n)
                    index += 1  # skip

            # find all PER consisting of at least 2 token
            if tag == 'B-PER' and next_possible:
                full_word = [tok]
                end_index = index + 1
                while end_index < len(lines):
                    tok_n, tag_n = lines[end_index]
                    if tag_n == 'I-PER':
                        full_word.append(tok_n)
                        end_index += 1
                    elif tag_n == 'E-PER':
                        full_word.append(tok_n)
                        res.append(' '.join(full_word))
                        break
                    else:
                        # ignore all other tokens
                        break
            index += 1

        # filter out useless stuff
        res = [
            r.replace('a.D.', '').replace('Katzemich/', 'Katzemich').replace(
                '/CSU',
                '').replace('/FDP', '').replace('/Hamburgisches', '').replace(
                    ').Darüber',
                    '').replace(')August', '').replace('AG.', '').replace(
                        '(CDU', '').replace('Prof.', '').replace(
                            ':\"Überraschenderweise',
                            '').replace('/CDU',
                                        '').replace('informieren.\"',
                                                    '').strip() for r in res
            if r not in blocklist and 'stiftung' not in r.lower()
            and 'von hayek' not in r.lower()
        ]
        res = [r for r in res if not r.startswith('Dr. ')]
        all_names.update(res)

    # remove entries that are consumed by other entries
    final = []
    for na in all_names:
        skip = False
        for na2 in all_names:
            if na == na2:
                continue
            if na.lower() in na2.lower() and na.lower() + 's' != na2.lower():
                skip = True
                break
            # remove e.g. 'Helmut Kohls' and keep 'Helmut Kohl'
            if na.lower() == na2.lower() + 's':
                skip = True
                break
        if not skip:
            final.append(na)

    return set(final)
Exemple #28
0
    # path_img = BASE_PATH+'/'+paths[0]
    # image = skimage.io.imread(path_img)
    # image = image.astype('float')/255
    # filter_responses = visual_words.extract_filter_responses(image)
    # util.display_filter_responses(filter_responses)
    # dictionary = np.load('../results/train_dictionary.npy')

    # for path in paths:
    #     image = skimage.io.imread(BASE_PATH+'/'+path)
    #     image = image.astype('float')/255
    #     wordmap = visual_words.get_visual_words(image,dictionary)
    #     #print(wordmap)
    #     util.save_wordmap(wordmap, os.path.basename(path))

    num_cores = multiprocessing.cpu_count()
    x, y = util.get_filenames(BASE_PATH)
    X_train, X_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=10)
    print(len(X_train))
    alpha = 50  # number of pixels per image
    K = 100  # number of cluster centers
    visual_words.compute_dictionary(X_train, RESULTS_PATH, K, alpha, 8)
    files = os.listdir(RESULTS_PATH)
    filter_responses = []
    for file in files:
        filter_responses.append(np.load(os.path.join(RESULTS_PATH, file)))
    filter_responses = np.concatenate(filter_responses, axis=0)
    kmeans = sklearn.cluster.KMeans(n_clusters=K,
                                    n_jobs=-1).fit(filter_responses)
Exemple #29
0
                            curInGameCount += -1
            saved_frame[:] = []
            last_check = save_count
            #clear folder
            clear_directory(path_out + "/temp")

        if (until != -1 and save_count == until):
            break
        frame_number += 1
    clear_directory(path_out + "/temp")
    result_file.close()
    vidcap.release()
    print(video_name + " Done!")


import util

if __name__ == "__main__":
    full_raw_videos = util.get_filenames(FULL_RAW_PATH)
    for index, full_raw_video in enumerate(full_raw_videos):
        video_no_ext = full_raw_video.replace('.mp4', '')
        print(f"[Start {index+1}/{len(full_raw_videos)}] " + video_no_ext)
        FindTransitions(FULL_RAW_PATH, full_raw_video,
                        CLASSIFIER_PATH + "inference_result")

        util.postprocess_timestamp('./inference_result/' + video_no_ext)

        util.cutVideo(video_no_ext, CLASSIFIER_PATH + 'inference_result/',
                      FULL_RAW_PATH, CLASSIFIER_PATH + 'full_video/')
        print(f"[End {index+1}/{len(full_raw_videos)}] " + video_no_ext)
Exemple #30
0
def main():
    filenames = get_filenames()
    for src, dest in filenames:
        generate_results(src, dest, dehaze)
Exemple #31
0
    parser.add_option('--mean_file', type='string')
    parser.add_option('--W', type='string', help='file to store W')
    parser.add_option('--t', type='int', help='the number of eigenvalues')
    parser.add_option('--mean_shape', type='string')
    parser.add_option('--debug', action="store_true", default=False)
    parser.add_option('--method', type='choice', default='mean_pca',
                      choices=['max', 'mean', 'max_pca', 'mean_pca', 'all'])
    (options, args) = parser.parse_args()
    return options

p = init()
if __name__ == '__main__':
    caffe.set_mode_gpu()
    m = Alignment(p.prototxt, p.model, p.layername, p.mean_file, p.W, p.t,
                  p.mean_shape)
    filenames = get_filenames(p.filelists)
    if p.method == 'all':
        shapes_max = []
        shapes_max_pca = []
        shapes_mean = []
        shapes_mean_pca = []
        for filename in filenames:
            print filename
            img = caffe.io.load_image(p.root + filename)
            shapes = m.process_all(img)
            shapes_max.append(shapes[0])
            shapes_max_pca.append(shapes[1])
            shapes_mean.append(shapes[2])
            shapes_mean_pca.append(shapes[3])
        dump(np.array(shapes_max), filenames, p.outpath + '_max')
        dump(np.array(shapes_max_pca), filenames, p.outpath + '_max_pca')
Exemple #32
0
    parser.add_option('--mean_shape', type='string')
    parser.add_option('--debug', action="store_true", default=False)
    parser.add_option('--method',
                      type='choice',
                      default='mean_pca',
                      choices=['max', 'mean', 'max_pca', 'mean_pca', 'all'])
    (options, args) = parser.parse_args()
    return options


p = init()
if __name__ == '__main__':
    caffe.set_mode_gpu()
    m = Alignment(p.prototxt, p.model, p.layername, p.mean_file, p.W, p.t,
                  p.mean_shape)
    filenames = get_filenames(p.filelists)
    if p.method == 'all':
        shapes_max = []
        shapes_max_pca = []
        shapes_mean = []
        shapes_mean_pca = []
        for filename in filenames:
            print filename
            img = caffe.io.load_image(p.root + filename)
            shapes = m.process_all(img)
            shapes_max.append(shapes[0])
            shapes_max_pca.append(shapes[1])
            shapes_mean.append(shapes[2])
            shapes_mean_pca.append(shapes[3])
        dump(np.array(shapes_max), filenames, p.outpath + '_max')
        dump(np.array(shapes_max_pca), filenames, p.outpath + '_max_pca')
Exemple #33
0
import sys

sys.path.append("../wrangle")

import util

from flair.data import Sentence
from flair.models import SequenceTagger

fs = util.get_filenames()
fs = [f for f in fs if f.name == 'Astroturfing.txt']
tagger = SequenceTagger.load('de-ner-germeval')
# fs = [f for f in fs if f.name.lower() == 'Bund Katholischer Unternehmer.txt'.lower()]
for f in fs:
    doc = util.preprocess(
        f,
        '/home/filter/spacy-data/de_core_news_sm-2.0.0/de_core_news_sm/de_core_news_sm-2.0.0'
    )
    sents = [Sentence(' '.join([str(t) for t in s])) for s in doc.sents]
    tagger.predict(sents, mini_batch_size=64)
    res = []
    for sentence in sents:
        for token in sentence:
            if token.tags['ner'] != '' and token.tags['ner'] != 'O':
                res.append(token.text + ' ' + token.tags['ner'])
    print(sents)
    print(res)
    with open('../data/res2/' + f.name, 'w') as outfile:
        outfile.write('\n'.join(res))