Esempio n. 1
0
def get_total_coding_rgn(features):
    """get total fraction of genome that is covered by coding region
    Not currently used in EGRIN2 pipeline but can be used to assess
    validity of cis-regulatory motifs."""
    #  Get coding regions from features file (e.g. Escherichia_coli_K12_features)
    with open(features, 'r') as f:
        skip = 1
        line = f.readline()
        while 'header' not in line:
            line = f.readline()
            skip += 1

    features = pd.read_table(features, skiprows=skip)
    genome_len = int(features.ix[0].end_pos)
    features = features[ features.type != 'SEQ_END' ]

    start_pos = npstr.replace(features.start_pos.values.astype(str),'<','').astype(int)
    end_pos = npstr.replace(features.end_pos.values.astype(str),'>','').astype(int)

    # tbd: use bitarray? (package bitarray https://pypi.python.org/pypi/bitarray)
    hits = np.zeros(genome_len + 1, dtype=bool)
    for i in range(len(start_pos)):
        hits[start_pos[i]:end_pos[i]] = True

    return np.mean(hits)
Esempio n. 2
0
def get_x(file):
    # read the train-x file
    train_x = np.genfromtxt(file, dtype='str', delimiter=',')
    train_x = npy_dy.replace(train_x, 'M', '0.25')
    train_x = npy_dy.replace(train_x, 'F', '0.5')
    train_x = npy_dy.replace(train_x, 'I', '0.75')
    return train_x.astype(np.float)
Esempio n. 3
0
def create_graph(file_path, resistor, y_axis_max):
    fig, ax = plt.subplots()

    file_name = os.path.basename(file_path).split('.')[0]
    data = np.genfromtxt(file_path, delimiter=',', dtype=str, skip_header=7)
    is_wet_test = True if data.shape[
        1] == 5 else False  #4 columns is for dry test, while 5 columns is for wet test
    if is_wet_test:
        data = data[:,
                    2:]  #remove first two columns (time string can't be converted to float
        data = np_f.replace(
            data, '"', ''
        )  #the csv files have double quotes for some reason - these need to be removed
        data = data.astype(np.float)  #convert remaining data to flaot
        sp_height = data[:, 0]
        ls_volts = data[:, 1]
        ps_volts = data[:, 2]
        sp_height_rel = sp_height[0] - sp_height
        ls_ohms = ls_volts / ((ps_volts - ls_volts) / float(resistor))

        ax.set_xlim([0, sp_height_rel.max()])
        ax.set_ylim([0, float(y_axis_max)])
        ax.plot(sp_height_rel, ls_ohms, linewidth=0.1)
        ax.set_xlabel('Height (mm)', fontsize=7)
        ax.set_ylabel('LS Resistance (ohms)', fontsize=7)

        start, end = ax.get_xlim()
        ax.xaxis.set_ticks(np.arange(0, end, 10))

    else:  #dry test
        data = data[:, 1:]
        data = np_f.replace(
            data, '"', ''
        )  # the csv files have double quotes for some reason - these need to be removed
        data = data.astype(np.float)  # convert remaining data to flaot
        ls_time = data[:, 0]
        ls_volts = data[:, 1]
        ps_volts = data[:, 2]
        ls_ohms = ls_volts / ((ps_volts - ls_volts) / float(resistor))

        ax.set_xlim([0, ls_time.max()])
        ax.set_ylim([0, float(y_axis_max)])
        ax.plot(ls_time, ls_ohms, linewidth=0.1)
        ax.set_xlabel('Time (sec)', fontsize=7)
        ax.set_ylabel('LS Resistance (ohms)', fontsize=7)

        start, end = ax.get_xlim()
        ax.xaxis.set_ticks(np.arange(0, end, 1))

    ax.tick_params(labelsize=5)
    ax.set_title(file_name, fontsize=7)
    ax.grid(linewidth=0.1)

    start, end = ax.get_ylim()
    ax.yaxis.set_ticks(np.arange(0, end, 50))

    #fig.savefig("test.png")
    #plt.show()
    make_pdf(file_path)
Esempio n. 4
0
def mutasi(individu):
    for i in range(2):
        random_index = randint(0, 6)
        ori = individu[random_index]
        mutan = npc.replace(ori, "0", "2")
        mutan = npc.replace(mutan, "1", "0")
        mutan = npc.replace(mutan, "2", "1")
        individu[random_index] = mutan
    return individu
Esempio n. 5
0
def img_dirs(resource, dir_) -> np.ndarray:
    dumped = join(DUMP_DIR, "img_dirs_{}.npy".format(resource))
    if exists(dumped):
        return np.load(dumped)
    if resource == "jpg":
        pathname = dir_+"*/*/*.jpg"
        if sys.platform == "win32":
            pathname = pathname.replace("\\", "/")
        dirs = glob(pathname=pathname)
        dirs = np.array(dirs)
        # dirs.dump(dumped)

    elif resource == "csv":
        pathname = dir_+"*/*images*.csv"
        if sys.platform == "win32":
            pathname = pathname.replace("\\", "/")
        dirs = glob(pathname=pathname)
        dfs = np.array(['image_name', 'image_url'])
        for c in dirs:
            dfs = np.append(dfs, genfromtxt(c, dtype=np.str, delimiter=",")[1:])
        dirs = dfs.reshape((-1, 2))[1:]
        dirs[:, 0] = char.replace(dirs[:, 0], ".jpg", "")
        # dirs.dump(dumped)
    else:
        raise FileNotFoundError("can't find {} images".format(resource))
    return dirs
Esempio n. 6
0
def read_pandas_txt(data_dir, train=True):

    data_df = pd.read_csv(data_dir, header=None)  #os.path.join(os.getcwd(),
    temp_ = np.asarray([str(x[0]).split(" ") for x in data_df.values])
    bin_temp_ = np_f.replace(temp_, ['semantic'], ['binary'])
    ins_temp_ = np_f.replace(bin_temp_, ['binary'], ['instance'])
    print(bin_temp_, ins_temp_)
    print("TOTAL NUMBERS:", temp_.shape)
    if train:
        X = temp_[:, 0]
        y = bin_temp_[:, 1]
        y_ins = ins_temp_[:, 1]
        return np.array(X), np.array(y), np.array(y_ins)  #,coords
    else:
        X = temp_[:, 0]
        return np.array(X)
Esempio n. 7
0
def load_source_rows(tab, names, key='assoc'):
    """Load the rows from a table that match a source name.

    Parameters
    ----------
    tab : `astropy.table.Table`
       Table that will be searched.

    names : list
       List of source identifiers.

    key : str
       Name of the table column that will be searched for a source
       matching key.

    Returns
    -------
    outtab : `astropy.table.Table`
       Table containing the subset of rows with matching source identifiers.

    """
    names = [name.lower().replace(' ', '') for name in names]
    col = tab[[key]].copy()
    
    col[key] = defchararray.replace(defchararray.lower(col[key]),
                               ' ', '')
    mask = create_mask(col, {key: names})
    return tab[mask]
Esempio n. 8
0
def stats(data):
        dict_of_lists = defaultdict(list)
        data = np.array(data)
        data_tran = data.T
        for i in data_tran:
            dict_of_lists[i[0]].append(i[1:])
            dict_of_lists[i[0]] = np_f.replace(dict_of_lists[i[0]], 'NA', '0')
            for i in dict_of_lists.keys():
                dict_of_lists[i] = dict_of_lists[i].astype(float)
            dict_of_lists[i] = dict_of_lists[i][0]
            list_of_dicts = dict_of_lists
        max_list = []
        min_list = []
        mean_list = []
        median_list = []
        var = []
        
        for i in list_of_dicts.keys():
            max_list.append(max(list_of_dicts[i]))
            min_list.append(min(list_of_dicts[i]))
            mean_list.append(np.mean(list_of_dicts[i]))
            median_list.append(np.median(list_of_dicts[i]))
            var.append(i)
           
        return max_list, min_list, mean_list, median_list, var
def plot_pie_difficulty(filename, data):
    
    # Gráfico de pizza (direita) correspondente a quanto pagariam pelo jogo
    data = np.array(COLUMN_DIFFICULTY_ITENS)[data - 1]
    labels, counts_elements = np.unique(data, return_counts=True)
    slices = np.round(counts_elements / len(data) * 100,1 )
    
    # Convertendo os labels para string e adicionando caracter de escape no $ (latex)
    labels_str = labels.astype(str)
    labels_str = np_f.replace(labels_str, '$', '\$')
    
    patches, texts, autotexts = plt.pie(slices, labels=labels_str, autopct='%.1f%%', startangle=90, counterclock=False)

    plt.setp(autotexts, size=16, weight="bold") 
    plt.axis('equal')

    for t in texts:
        t.set_size('large')
    for t in autotexts:
        t.set_size('large')

    plt.savefig(filename, bbox_inches='tight', dpi=400)    
    plt.show()
    plt.close()
    
    return slices, labels
Esempio n. 10
0
def remove_cols(data, skip_cols):
    conv = []
    colnr = 0
    for col in data:
        if colnr % 200 == 0:
            print('processing column {0:d}...'.format(colnr))
            gc.collect()
        if colnr not in skip_cols:
            col = strip(col, '"')
            col = replace(col, '', '0')
            col = replace(col, 'NA', '0')
            col = replace(col, 'false', '0')
            col = replace(col, 'true', '1')
            conv.append(col.astype(int16))
        colnr += 1
    gc.collect()
    return array(conv)
Esempio n. 11
0
def remove_cols(data, skip_cols):
	conv = []
	colnr = 0
	for col in data:
		if colnr % 200 == 0:
			print('processing column {0:d}...'.format(colnr))
			gc.collect()
		if colnr not in skip_cols:
			col = strip(col, '"')
			col = replace(col, '', '0')
			col = replace(col, 'NA', '0')
			col = replace(col, 'false', '0')
			col = replace(col, 'true', '1')
			conv.append(col.astype(int16))
		colnr += 1
	gc.collect()
	return array(conv)
Esempio n. 12
0
def plot_pie(filename, data1, data2):

    fig, axes = plt.subplots(nrows=1, ncols=2)

    # Gráfico de pizza (esquerda) correspondente a quantidade de jogadores com interesse em jogar
    length = len(data1)
    py = round(
        np.count_nonzero(data1 == COLUMN_PAY_ITENS[0]) / length * 100, 0)
    pn = round(
        np.count_nonzero(data1 == COLUMN_PAY_ITENS[1]) / length * 100, 0)

    patches, texts, autotexts = axes[0].pie([py, pn],
                                            labels=COLUMN_PLAY_ITENS,
                                            autopct='%.1f%%',
                                            startangle=90,
                                            counterclock=False)
    axes[0].set_title(COLUMN_PLAY, fontsize=8)
    axes[0].axis('equal')

    plt.setp(autotexts, size=12, weight="bold")

    for t in texts:
        t.set_size('x-small')
    for t in autotexts:
        t.set_size('x-small')

    # Gráfico de pizza (direita) correspondente a quanto pagariam pelo jogo
    labels, counts_elements = np.unique(data2, return_counts=True)
    slices = np.round(counts_elements / length * 100, 1)

    # Convertendo os labels para string e adicionando caracter de escape no $ (latex)
    labels_str = labels.astype(str)
    labels_str = np_f.replace(labels_str, '$', '\$')

    patches, texts, autotexts = axes[1].pie(slices,
                                            labels=labels_str,
                                            autopct='%.1f%%',
                                            startangle=90,
                                            counterclock=False)
    axes[1].set_title(COLUMN_PAY, fontsize=8)
    axes[1].axis('equal')

    plt.setp(autotexts, size=8, weight="bold")

    for t in texts:
        t.set_size('x-small')
    for t in autotexts:
        t.set_size('x-small')

    plt.axis('equal')
    plt.subplots_adjust(wspace=0.75)
    plt.savefig(filename + '_pie.png', bbox_inches='tight', dpi=400)
    plt.show()
    plt.close()

    return py, pn, slices, labels
Esempio n. 13
0
    def get_filenames(self, file_ext='.xml'):
        """ Helper function which gets the filename identifiers (exluding the file extension) from a directory

                Args:
                    path_to_dataset (string): Path to directory with the files
                    file_ext (string): File extension to be spliced out of filename
                Returns:
                    ndarray with the files identifiers
        """
        files = os.listdir(self.path_to_annotations)
        arr = np.array(files)
        result = replace(arr, file_ext, '')

        return result
Esempio n. 14
0
def to_ints_only(data):
    conv = []
    failed = []
    colnr = 0
    for col in data:
        colnr += 1
        if colnr % 100 == 0:
            print('converting column {0:d}...'.format(colnr))
        col = strip(col, '"')
        col = replace(col, '', '0')
        col = replace(col, 'NA', '0')
        col = replace(col, 'false', '0')
        col = replace(col, 'true', '1')
        try:
            irow = col.astype(int16)
        except ValueError as err:
            skiprows.append(colnr - 1)
            failed.append(str(err).split(':', 1)[1])
        except OverflowError as err:
            print(str(err))
            skiprows.append(colnr - 1)
        # except OverflowError as err:
        # 	print(str(err))
        # 	print('will look for overflow error value...')
        # 	for v in col:
        # 		try:
        # 			v.astype(int)
        # 		except:
        # 			print 'overflow:', v
        else:
            conv.append(irow)
        del col
        gc.collect()  # free memory
    print('failed for (excluding overflows): "{0:s}"'.format(
        '", "'.join(failed)))
    print('{0:d} columns removed'.format(len(failed)))
    return array(conv)
Esempio n. 15
0
def to_ints_only(data):
	conv = []
	failed = []
	colnr = 0
	for col in data:
		colnr += 1
		if colnr % 100 == 0:
			print('converting column {0:d}...'.format(colnr))
		col = strip(col, '"')
		col = replace(col, '', '0')
		col = replace(col, 'NA', '0')
		col = replace(col, 'false', '0')
		col = replace(col, 'true', '1')
		try:
			irow = col.astype(int16)
		except ValueError as err:
			skiprows.append(colnr - 1)
			failed.append(str(err).split(':', 1)[1])
		except OverflowError as err:
			print(str(err))
			skiprows.append(colnr - 1)
		# except OverflowError as err:
		# 	print(str(err))
		# 	print('will look for overflow error value...')
		# 	for v in col:
		# 		try:
		# 			v.astype(int)
		# 		except:
		# 			print 'overflow:', v
		else:
			conv.append(irow)
		del col
		gc.collect()  # free memory
	print('failed for (excluding overflows): "{0:s}"'.format('", "'.join(failed)))
	print('{0:d} columns removed'.format(len(failed)))
	return array(conv)
Esempio n. 16
0
def create_graph(file_path, wet_test, y_axis_max, data_start_index):
    fig, ax = plt.subplots()

    file_name = os.path.basename(file_path).split('.')[0]
    data = np.genfromtxt(file_path,
                         delimiter=',',
                         dtype=str,
                         skip_header=data_start_index)
    data = np_f.replace(
        data, '"', ''
    )  # the csv files have double quotes for some reason - these need to be removed
    data = data.astype(np.float)  # convert remaining data to float
    if wet_test == 'True':
        sys_height = data[:, 1]
        ls_ohms = data[:, 4]

        ax.set_xlim([0, sys_height.max()])
        ax.set_ylim([0, float(y_axis_max)])
        ax.plot(sys_height, ls_ohms, linewidth=0.1)
        ax.set_xlabel('Height (mm)', fontsize=7)
        ax.set_ylabel('LS Resistance (ohms)', fontsize=7)

        start, end = ax.get_xlim()
        ax.xaxis.set_ticks(np.arange(0, end, 10))

    else:  #dry test
        ls_time = data[:, 0]
        ls_ohms = data[:, 4]

        ax.set_xlim([0, ls_time.max()])
        ax.set_ylim([0, float(y_axis_max)])
        ax.plot(ls_time, ls_ohms, linewidth=0.1)
        ax.set_xlabel('Time (sec)', fontsize=7)
        ax.set_ylabel('LS Resistance (ohms)', fontsize=7)

        start, end = ax.get_xlim()
        ax.xaxis.set_ticks(np.arange(0, end, 1))

    ax.tick_params(labelsize=5)
    ax.set_title(file_name, fontsize=7)
    ax.grid(linewidth=0.1)

    start, end = ax.get_ylim()
    ax.yaxis.set_ticks(np.arange(0, end, 50))

    #fig.savefig("test.png")
    #plt.show()
    make_pdf(file_path)
Esempio n. 17
0
def class_scoresmat2csv(matfile, bin_lid):
    """Convert a class score .mat file into a CSV representation"""
    try:
        import pandas as pd
    except ImportError:
        return '\n'.join(slow_class_scoresmat2csv(matfile, bin_lid))
    scores = loadmat(matfile, squeeze_me=True)

    prefix = bin_lid + '_'
    cols = scores['class2useTB'][:-1] # exclude last class: 'unclassified'
    df = pd.DataFrame(scores['TBscores'], columns=cols)
    p = scores['roinum'].astype(str)
    p = replace(rjust(p,6,'0'),'0',prefix,1)
    pid = pd.Series(p)
    df.insert(0,'pid',pid)
    s = StringIO()
    df.to_csv(s,index=False, float_format='%f')
    csv_out = s.getvalue().replace('0.000000','0.0')
    return csv_out
Esempio n. 18
0
def find_rows_by_string(tab, names, colnames=['assoc']):
    """Find the rows in a table ``tab`` that match at least one of the
    strings in ``names``.  This method ignores whitespace and case
    when matching strings.

    Parameters
    ----------
    tab : `astropy.table.Table`
       Table that will be searched.

    names : list
       List of strings.

    colname : str
       Name of the table column that will be searched for matching string.

    Returns
    -------
    mask : `~numpy.ndarray`
       Boolean mask for rows with matching strings.

    """
    mask = np.empty(len(tab), dtype=bool)
    mask.fill(False)
    names = [name.lower().replace(' ', '') for name in names]

    for colname in colnames:

        if colname not in tab.columns:
            continue

        col = tab[[colname]].copy()
        col[colname] = defchararray.replace(defchararray.lower(col[colname]).astype(str),
                                        ' ', '')
        for name in names:
            mask |= col[colname] == name
    return mask
Esempio n. 19
0
def find_rows_by_string(tab, names, colnames=['assoc']):
    """Find the rows in a table ``tab`` that match at least one of the
    strings in ``names``.  This method ignores whitespace and case
    when matching strings.

    Parameters
    ----------
    tab : `astropy.table.Table`
       Table that will be searched.

    names : list
       List of strings.

    colname : str
       Name of the table column that will be searched for matching string.

    Returns
    -------
    mask : `~numpy.ndarray`
       Boolean mask for rows with matching strings.

    """
    mask = np.empty(len(tab), dtype=bool)
    mask.fill(False)
    names = [name.lower().replace(' ', '') for name in names]

    for colname in colnames:

        if colname not in tab.columns:
            continue

        col = tab[[colname]].copy()
        col[colname] = defchararray.replace(
            defchararray.lower(col[colname]).astype(str), ' ', '')
        for name in names:
            mask |= col[colname] == name
    return mask
Esempio n. 20
0
def extract(address):
    
    
    def gen_divider(d):
        A = []
        B = []
        a = d[:,0][0]
        j = 0
        for i in range(1,len(d[:,0])):


            if a != d[:,0][i] :
                B.append(i-1)
                A.append(j)
                a = d[:,0][i]
                j = i

        A.append(j)
        B.append(len(d[:,0])-1)
        V = []
        V.append(A)
        V.append(B)
        return V
    
    
    prt_container = []
    
    
    with open( address ) as pdbfile:

        for line in pdbfile:

            if line[:6] == 'SEQRES':

                splitted_line = np.array( [str(line[11]),str(line[19:22]), 
                                         str(line[23:26]), str(line[27:30]),
                                         str( line[31:34]), str(line[35:38]),
                                         str( line[39:42]),str (line[43:46]),
                                         str (line[47:50]), str(line[51:54])] )

                prt_container.append(splitted_line)
                
                
                
    d = np.array(prt_container)            

    convert = {'A':'ALA','C':'CYS','D':"ASP",'E':'GLU',
               'F':'PHE','G':'GLY','H':'HIS','I':'ILE',
               'K':'LYS','L':'LEU','N':'ASN','M':'MET',
               'P':'PRO','Q':'GLN','R':'ARG','S':'SER',
               'T':'THR','V':'VAL','W':'TRP','Y':'TYR'}

    dummy = list(dict.fromkeys(convert))
    for i in dummy:    #to convert the array elements to 
        d =  np_f.replace(d ,convert[i], i)
        
    
    z= []
    for j,i in zip(gen_divider(d)[0],gen_divider(d)[1]):
        x = np.concatenate(d[j:i+1,1:], axis = 0)
        z.append(''.join(list(x)))
        
    return z
Esempio n. 21
0

def roundup(x):
    return int(math.ceil(x / 10.0)) * 10


if __name__ == "__main__":

    file_path = r'C:\Data\sweep test data (single cycle).csv'
    data = np.genfromtxt(
        file_path,
        delimiter=',',
        dtype=str,
    )
    data = np_f.replace(
        data, '"', ''
    )  # the csv files have double quotes for some reason - these need to be removed
    data = data.astype(np.float)  # convert remaining data to float

    create_graph(
        data=data,
        data_path=file_path,
        title_1=
        'CA2020-3549, MAPPS, Post 6.8 Mechanical Strength of Electrical Connector',
        title_2='2921-1, Wet Test',
        tol_path=
        r'C:\Users\gtetil\Documents\Projects\Reliability-Sweeper\Source\Files\Tolerances\MLS Tolerance (MS, dry).csv',
        tol_band_color=0,
        graph_type=1,
        height_min=-15,
        height_max=235,
            float, dz.observation_dict[obj_target +
                                       '_arc'])  #This must be changed to a df
        arc_idx = (dz.reducDf.reduc_tag == 'arc_trim') & (dz.reducDf.RUN.isin(
            arc_run)) & (dz.reducDf.ISIARM == '{color} arm'.format(
                color=arm_color)) & (dz.reducDf.valid_file)
        arc_filename = dz.reducDf.loc[arc_idx, 'file_name'].values[0]
        arc_code = arc_filename[0:arc_filename.rfind('.')]

        #Get the object
        index_object = (dz.reducDf.reduc_tag == 'trim_image') & (
            dz.reducDf.frame_tag
            == obj_target) & (dz.reducDf.ISIARM == '{color} arm'.format(
                color=arm_color)) & (dz.target_validity_check())
        Files_Folder = dz.reducDf.loc[index_object, 'file_location'].values
        Files_Name = dz.reducDf.loc[index_object, 'file_name'].values
        output_names = np_f.replace(Files_Name.astype(str), '.fits', '_w.fits')

        for j in range(len(Files_Name)):
            dz.task_attributes['run folder'] = Files_Folder[j]
            dz.task_attributes['color'] = arm_color
            dz.task_attributes['input'] = Files_Folder[j] + Files_Name[j]
            dz.task_attributes['output'] = Files_Folder[j] + output_names[j]
            dz.task_attributes['fitnames'] = arc_code

            #Moving the data base to the obj/standard folder
            if not os.path.exists(dz.task_attributes['run folder'] +
                                  'database/'):
                os.makedirs(dz.task_attributes['run folder'] + 'database/')

            input_arc_code = '{in_folder}database/fc{arc_code}'.format(
                in_folder=dz.reducFolders['arcs'], arc_code=arc_code)
Esempio n. 23
0
def create_graph(data, data_path, title_1, title_2, tol_path, tol_band_color,
                 graph_type, height_min, height_max, height_interval,
                 resistance_max, resistance_interval, time_max, time_interval,
                 graph_output_file, auto_open):

    fig = plt.figure()
    ax = fig.add_subplot(111)
    graph_type = Graph_Type(graph_type)  # convert graph type to an enum
    graph_output_file = Graph_Output_File(
        graph_output_file)  # convert graph output file to an enum
    tol_band_color = Color(tol_band_color)  # convert tol band color to an enum

    # LS data
    data = np.array(data)
    height_array = data[:, 1]
    min_height = np.amin(height_array)
    max_height = np.amax(height_array)
    mid_height = (max_height - min_height) / 2
    low_to_high_sweep = True if height_array[0] < mid_height else False
    if low_to_high_sweep:  # if sweep starts low and goes high, use this algorithm
        first_transition_index = int(np.argwhere(height_array > mid_height)[0])
        second_transition_index = int(
            np.argwhere(height_array[first_transition_index:] < mid_height -
                        10)[0]) + first_transition_index
        end_height_index = np.argmin(height_array[second_transition_index:(
            first_transition_index +
            second_transition_index)]) + second_transition_index
    else:  # if sweep starts high and goes low, use this algorithm
        first_transition_index = int(np.argwhere(height_array < mid_height)[0])
        second_transition_index = int(
            np.argwhere(height_array[first_transition_index:] > mid_height +
                        10)[0]) + first_transition_index
        end_height_index = np.argmax(height_array[second_transition_index:(
            first_transition_index +
            second_transition_index)]) + second_transition_index
    if graph_type in [Graph_Type.Both, Graph_Type.H_vs_R]:
        data = data[:end_height_index, :]
    time = data[:, 0]

    # separate data into e-to-f and f-to-e
    sys_height = data[:, 1]
    ls_ohms = data[:, 4]
    if low_to_high_sweep:
        min_indice = np.argmax(sys_height)
        empty_to_full = ls_ohms[:min_indice]
        full_to_empty = ls_ohms[min_indice:]
        empty_to_full_sys = sys_height[:min_indice]
        full_to_empty_sys = sys_height[min_indice:]
    else:
        min_indice = np.argmin(sys_height)
        empty_to_full = ls_ohms[min_indice:]
        full_to_empty = ls_ohms[:min_indice]
        empty_to_full_sys = sys_height[min_indice:]
        full_to_empty_sys = sys_height[:min_indice]

    lns = ax.plot()

    # height vs resistance
    if graph_type in [Graph_Type.Both, Graph_Type.H_vs_R]:
        ax.set_xlim(int(height_min), int(height_max))
        ax.set_ylim(0, float(resistance_max))
        e_to_f_plot = ax.plot(empty_to_full_sys,
                              empty_to_full,
                              linewidth=0.5,
                              label='R vs. H - fill',
                              color='blue')
        f_to_e_plot = ax.plot(full_to_empty_sys,
                              full_to_empty,
                              linewidth=0.5,
                              label='R vs. H - drain',
                              color='magenta')
        ax.set_xlabel('Height (mm)', fontsize=7)
        ax.set_ylabel(r'Resistance ($\Omega$)', fontsize=7)
        ax.xaxis.set_ticks(
            np.append(np.arange(height_min, height_max, height_interval),
                      height_max))
        ax.yaxis.set_ticks(
            np.append(np.arange(0, resistance_max, resistance_interval),
                      resistance_max))
        lns = e_to_f_plot + f_to_e_plot
        if graph_type == Graph_Type.H_vs_R:
            fig.suptitle(title_1, fontsize=10, fontweight='bold')
            ax.set_title(title_2, fontsize=10, y=1.03)

    # time vs resistance (with height vs resistance)
    if graph_type == Graph_Type.Both:
        ax2 = ax.twiny()
        ax2.set_xlim(0, time_max)
        r_vs_t = ax2.plot(time,
                          ls_ohms,
                          linewidth=0.5,
                          label='Resistance vs. Time',
                          color='orange')
        ax2.set_xlabel('Time (sec)', fontsize=7)
        ax2.xaxis.set_ticks(
            np.append(np.arange(0, time_max, time_interval), time_max))
        lns = lns + r_vs_t
        ax2.tick_params(labelsize=5)
        plt.subplots_adjust(top=0.835)
        fig.suptitle(title_1, fontsize=10, fontweight='bold')
        ax.set_title(title_2, fontsize=10,
                     y=1.095)  # this raises the title to fit the top x-axis

    # time vs resistance (only)
    if graph_type == Graph_Type.T_vs_R:
        ax.set_xlim(0, time_max)
        ax.set_ylim(0, resistance_max)
        r_vs_t = ax.plot(time,
                         ls_ohms,
                         linewidth=0.5,
                         label='Resistance vs. Time',
                         color='orange')
        ax.set_xlabel('Time (sec)', fontsize=7)
        ax.set_ylabel(r'Resistance ($\Omega$)', fontsize=7)
        ax.xaxis.set_ticks(
            np.append(np.arange(0, time_max, time_interval), time_max))
        ax.yaxis.set_ticks(
            np.append(np.arange(0, resistance_max, resistance_interval),
                      resistance_max))
        lns = r_vs_t
        fig.suptitle(title_1, fontsize=10, fontweight='bold')
        ax.set_title(title_2, fontsize=10, y=1.03)

    # tolerance bands
    if tol_path != '' and graph_type != Graph_Type.T_vs_R:
        tol_data = np.genfromtxt(tol_path,
                                 delimiter=',',
                                 dtype=str,
                                 skip_header=1)
        tol_data = np_f.replace(
            tol_data, '"', ''
        )  # the csv files have double quotes for some reason - these need to be removed
        tol_data = tol_data.astype(np.float)  # convert remaining data to float
        low_tol_plot = ax.plot(tol_data[:, 0],
                               tol_data[:, 1],
                               linewidth=0.5,
                               color=tol_band_color.name,
                               label='Tolerance',
                               linestyle=':')
        up_tol_plot = ax.plot(tol_data[:, 2],
                              tol_data[:, 3],
                              linewidth=0.5,
                              color=tol_band_color.name,
                              linestyle=':')
        lns = lns + low_tol_plot

    # create legend
    labs = [l.get_label() for l in lns]
    ax.legend(lns, labs, loc=3, fontsize='x-small')

    ax.tick_params(labelsize=5)
    ax.grid(linewidth=0.1)

    # output graph to file
    graph_path = data_path.replace('.csv', '.' + graph_output_file.name)
    if graph_output_file == Graph_Output_File.png:
        fig.savefig(graph_path, dpi=1000)
    elif graph_output_file == Graph_Output_File.pdf:
        make_pdf(graph_path)

    if auto_open:
        subprocess.Popen([graph_path], shell=True)

    #plt.show()

    plt.close(
        fig
    )  # must close figure, or there will be a memory error when running batch_graph_creator.py
Esempio n. 24
0
# Bộ dữ liệu Iris có chứa cột species ở dạng chuỗi. Do đó, cần chuyển dạng này sang dạng số

# Lấy các đặc trưng và lưu vào biến X
X = np.genfromtxt('iris.csv',
                  delimiter=',',
                  dtype='float',
                  usecols=[0, 1, 2, 3],
                  skip_header=1)
print(X.shape)

# Lấy species và lưu vào biến y
y = np.genfromtxt('iris.csv',
                  delimiter=',',
                  dtype='str',
                  usecols=4,
                  skip_header=1)

# thay chuỗi bằng số
# Sử dụng np.unique() để lấy các loại chuỗi duy nhất trong một mảng np
categories = np.unique(y)
print(categories)

for i in range(categories.size):
    # hàm np_f.replace() để thay giá trị kiểu chuỗi
    y = np_f.replace(y, categories[i], str(i))

# đưa về kiểu float
y = y.astype('float')
print(y)
Esempio n. 25
0
 def handle_missing_data(data):
     print("Replacing irrelevant values")
     data = np_f.replace(data, 'NA', '0')
     data = data.astype(float)
     return data
Esempio n. 26
0
    img = imread('../data/converted/' + fname + '.jpeg')
    hsv = color.rgb2hsv(img)
    hsv[:, :, 2] = exposure.equalize_hist(hsv[:, :, 2])
    img = color.hsv2rgb(hsv)
    imsave(folder + label + '/' + fname + '.png', img)


full_filenames = np.genfromtxt('../data/train_filenames.txt', dtype=str)

# Read the labels file
full_labels = np.genfromtxt('../data/trainLabels.csv',
                            skip_header=1,
                            dtype=str,
                            delimiter=',')
# Keep only labels of data that can be used in training
full_samples = replace(full_filenames, ".jpeg", "")
full_mask = np.isin(full_labels[:, 0], full_samples)
trainable_labels = np.copy(full_labels[full_mask, :])
test_labels = np.copy(full_labels[np.invert(full_mask), :])

# Downsample the zero grade, keeping only the first 5000
# Randomize order
np.random.seed(1234)
np.random.shuffle(trainable_labels)

# Remove a part for validation
n_validation = 2400
validation_labels = np.copy(trainable_labels[:n_validation, :])
trainable_labels = np.copy(trainable_labels[n_validation:, :])

# Arrange by a stable sort (mergesort)
colors = ['Blue', 'Red'] 
for arm_color in colors:
    
    for obj_target in dz.observation_dict['objects'] + dz.observation_dict['Standard_stars']:
    
        #Get the arc
        arc_run         = map(float ,dz.observation_dict[obj_target + '_arc'])  #This must be changed to a df
        arc_idx         = (dz.reducDf.reduc_tag == 'arc_trim') & (dz.reducDf.RUN.isin(arc_run)) & (dz.reducDf.ISIARM == '{color} arm'.format(color = arm_color)) & (dz.reducDf.valid_file)  
        arc_filename    = dz.reducDf.loc[arc_idx, 'file_name'].values[0]
        arc_code        = arc_filename[0:arc_filename.rfind('.')]
    
        #Get the object
        index_object    = (dz.reducDf.reduc_tag == 'trim_image') & (dz.reducDf.frame_tag == obj_target) & (dz.reducDf.ISIARM == '{color} arm'.format(color = arm_color)) & (dz.target_validity_check())    
        Files_Folder    = dz.reducDf.loc[index_object, 'file_location'].values
        Files_Name      = dz.reducDf.loc[index_object, 'file_name'].values
        output_names    = np_f.replace(Files_Name.astype(str), '.fits', '_w.fits')
        
        for j in range(len(Files_Name)):  
            dz.task_attributes['run folder']    = Files_Folder[j]
            dz.task_attributes['color']         = arm_color
            dz.task_attributes['input']         = Files_Folder[j] + Files_Name[j]
            dz.task_attributes['output']        = Files_Folder[j] + output_names[j]
            dz.task_attributes['fitnames']      = arc_code
  
            #Moving the data base to the obj/standard folder
            if not os.path.exists(dz.task_attributes['run folder'] + 'database/'):
                os.makedirs(dz.task_attributes['run folder'] + 'database/') 
            
            input_arc_code  = '{in_folder}database/fc{arc_code}'.format(in_folder = dz.reducFolders['arcs'], arc_code = arc_code)
            output_arc_code = '{out_folder}database/fc{arc_code}'.format(out_folder = dz.task_attributes['run folder'], arc_code = arc_code)           
            copyfile(input_arc_code, output_arc_code)
Esempio n. 28
0
    astaltObj, fib4Obj, annObj
]

toronto.df = pd.read_excel(
    'C:/Users/Soren/Desktop/Thesis/Data Analysis/toronto_dataset.xlsx',
    parse_cols="A:BK")
#toronto.df = toronto.df.drop_duplicates(subset='MRN', keep='first')
#toronto.df = toronto.df.loc[(toronto.df['TotalMissingnessWithSodiumGGTPlatelets'] < 0) & (toronto.df['DecompensatedCirrhosis'] == 0)] # & (toronto.df['Platelets'] > 0)  &  )
toronto.df = toronto.df.loc[(toronto.df['DecompensatedCirrhosis'] == 0)  &  (toronto.df['Albumin'] > 0) & (toronto.df['ALP'] > 0) & (toronto.df['ALT'] > 0) \
                           & (toronto.df['AST'] > 0) & (toronto.df['Bilirubin'] > 0) & (toronto.df['Creatinine'] > 0) & (toronto.df['INR'] > 0)\
                             & (toronto.df['Platelets'] > 0) & (toronto.df['BMI'] > 0) ]
toronto.df = toronto.df.sample(frac=1).reset_index(drop=True)
toronto.X = toronto.df.iloc[:, 0:49].values
toronto.Y = toronto.df.iloc[:, 49].values
toronto.Y = nd.replace(
    nd.replace(nd.replace(toronto.Y.astype(str), 'F 4', '4'), 'F 1', '0'),
    'F 0', '0').astype(int)
toronto.MRNs = toronto.df.iloc[:, 51]
toronto.entryDates = toronto.df.iloc[:, 52]
toronto.split = 'groupKFold'  # KFold # groupKFold
dft = toronto.df

from sklearn.model_selection import GroupKFold
from sklearn.model_selection import KFold

kf = GroupKFold(n_splits=10)
normalKF = KFold(n_splits=10, shuffle=True, random_state=0)
kf.get_n_splits(toronto.X, toronto.Y, toronto.MRNs.astype(int))

svmObj.params = {
    'method': 'label',
Esempio n. 29
0
import numpy as np
import matplotlib.pyplot as plt
import numpy.core.defchararray as defCharArr

x = np.arange(-1.0, 1.01, 0.01)

testData = np.genfromtxt("test.txt", dtype='str')
testData = defCharArr.replace(testData, ',', '.')
testData = testData.astype(float)
print("Входные значения:\n", testData)
print("\n")

plt.figure(1)
plt.plot(x, testData, 'b.')

sigma = 0.5
eta = 0.1
centres = [-0.8, -0.6, -0.4, -0.2, 0.0, 0.2, 0.4, 0.6, 0.8]
weights = [0.2, 0.5, 0.8, 0.1, 0.88, 0.5, 0.3, 0.22, -0.2]


def net(testVal, centres):
    return np.exp((-1 / (2 * sigma)) * ((centres - testVal)**2))


numberOfEvals = range(2000)
testDataLength = len(testData)
numberOfValues = np.arange(0, testDataLength, 1)

for j in numberOfEvals:
    for i in numberOfValues:
Esempio n. 30
0
def get_generators(n_total,
                   batch_size,
                   image_shape=None,
                   type='array',
                   zeros_left=5000):
    '''
    Construct generators for training and validation data
    Zero grade images are downsampled
    :param n_total: number of total images to use (training plus validation)
    :param batch_size: batch size used in training
    :param image_shape: image size used in training
    :param zeros_left: how many images of grade zero should be left in the pool
                       use a negative value to keep all the zeros
    :return: train_gen: generator of training data
             test_gen: generator of validation data
    '''
    # Set the number of training samples
    n_train = int(np.ceil(n_total * 0.8))
    n_test = int(np.floor(n_total * 0.2))

    # Read filenames from a text file listing all the images
    full_filenames = np.genfromtxt('../data/train_filenames.txt', dtype=str)
    # Read the labels file
    full_labels = np.genfromtxt('../data/trainLabels.csv',
                                skip_header=1,
                                dtype=str,
                                delimiter=',')
    # Keep only labels of data that can be used in training
    full_samples = replace(full_filenames, ".jpeg", "")
    full_mask = np.isin(full_labels[:, 0], full_samples)
    trainable_labels = np.copy(full_labels[full_mask, :])

    # Downsample the zero grade, keeping only the first 5000
    # Randomize order
    np.random.seed(1234)
    np.random.shuffle(trainable_labels)
    # Arrange by a stable sort (mergesort)
    trainable_labels = np.copy(
        trainable_labels[trainable_labels[:, 1].argsort(kind='mergesort')])
    # Remove extra zeros
    if zeros_left > 0:
        _, counts = np.unique(trainable_labels[:, 1], return_counts=True)
        n_zeros = counts[0]
        downsampled_labels = np.copy(trainable_labels[(n_zeros -
                                                       zeros_left):, :])
    else:
        downsampled_labels = np.copy(trainable_labels)

    # Randomize and choose training data
    np.random.shuffle(downsampled_labels)
    train_labels = downsampled_labels[:n_train, :]
    #test_labels = downsampled_labels[n_train:(n_train + n_test)]
    # Exclude training samples from the original data and choose test data among them
    np.random.shuffle(trainable_labels)
    exclusion = np.isin(trainable_labels[:, 0],
                        train_labels[:, 0],
                        invert=True)
    valid_labels = np.copy(trainable_labels[exclusion, :])
    test_labels = np.copy(valid_labels[:n_test, :])

    # Print the counts of each class in test and train data
    _, train_counts = np.unique(train_labels[:, 1], return_counts=True)
    print("\nTrain distribution:")
    print(train_counts / np.sum(train_counts))
    _, test_counts = np.unique(test_labels[:, 1], return_counts=True)
    print("\nTest distribution:")
    print(test_counts / np.sum(test_counts))
    print("\n")

    if type == 'array':
        # Add .npy file ending
        train_filenames = add(train_labels[:, 0],
                              np.full(shape=n_train, fill_value='.npy'))
        test_filenames = add(test_labels[:, 0],
                             np.full(shape=n_test, fill_value='.npy'))
        # Add path of the data folder to the files
        train_filepaths = add(
            np.full(shape=train_filenames.shape, fill_value='../data/arrays/'),
            train_filenames)
        test_filepaths = add(
            np.full(shape=test_filenames.shape, fill_value='../data/arrays/'),
            test_filenames)

        # Create an instance of the image generator
        train_gen = ArrayGenerator(train_filepaths, train_labels[:, 1],
                                   batch_size)
        test_gen = ArrayGenerator(test_filepaths, test_labels[:, 1],
                                  batch_size)

    elif type == 'image':
        if image_shape is None:
            raise ValueError
        # Add .jpeg file ending
        train_filenames = add(train_labels[:, 0],
                              np.full(shape=n_train, fill_value='.jpeg'))
        test_filenames = add(test_labels[:, 0],
                             np.full(shape=n_test, fill_value='.jpeg'))
        # Add path of the data folder to the files
        train_filepaths = add(
            np.full(shape=train_filenames.shape, fill_value='../data/train/'),
            train_filenames)
        test_filepaths = add(
            np.full(shape=test_filenames.shape, fill_value='../data/train/'),
            test_filenames)

        # Create an instance of the image generator
        train_gen = ImageGenerator(train_filepaths, train_labels[:, 1],
                                   batch_size, image_shape)
        test_gen = ImageGenerator(test_filepaths, test_labels[:, 1],
                                  batch_size, image_shape)

    return train_gen, test_gen
def Normalized_Data():

    from dtw import dtw
    import numpy as np
    import matplotlib.pyplot as plt
    import csv
    import itertools
    import numpy.core.defchararray as np_f

    x_normalized = []
    y_normalized = []

    Data1 = []
    Data2 = []
    Data3 = []

    List = []
    List2 = []
    List3 = []

    DTW_Sat = []
    DTW_Week = []

    FID = []
    Longitude = []
    Latitude = []

    count1 = 0
    count2 = 0

    Regular_Sat_Location = r'C:\Users\patri\Desktop\Thesis_Final\Data_From_Extraction\M05_D07_HL_Edit_Regular_Sat.csv'
    Regular_Weekday_Location = r'C:\Users\patri\Desktop\Thesis_Final\Data_From_Extraction\M05_D25_HL_Edit_Regular_Weekday.csv'
    Memorial_Day_Location = r'C:\Users\patri\Desktop\Thesis_Final\Data_From_Extraction\M05_D28_HL_Edit_Memorial_Day_Sat.csv'

    with open(Regular_Sat_Location) as file:
        reader = csv.reader(file, delimiter=',')

        for column in reader:

            Data1.append(column[2])

            FID.append(column[0])

            Longitude.append(column[5])

            Latitude.append(column[4])

    FID.remove(FID[0])
    Longitude.remove(Longitude[0])
    Latitude.remove(Latitude[0])
    Data1.remove(Data1[0])

    with open(Regular_Weekday_Location) as file:
        reader = csv.reader(file, delimiter=',')

        for column in reader:

            Data2.append(column[2])

    Data2.remove(Data2[0])

    with open(Memorial_Day_Location) as file:
        reader = csv.reader(file, delimiter=',')

        for column in reader:

            Data3.append(column[2])

    Data3.remove(Data3[0])

    for day in Data1:

        result = [i.strip() for i in day.split(',')]
        List.append(result)

    Reg_Sat_List = list(itertools.chain.from_iterable(List))

    for day2 in Data2:

        result2 = [i2.strip() for i2 in day2.split(',')]
        List2.append(result2)

    Reg_Weekday_List = list(itertools.chain.from_iterable(List2))

    for day3 in Data3:

        result3 = [i3.strip() for i3 in day3.split(',')]
        List3.append(result3)

    Memorial_Day_List = list(itertools.chain.from_iterable(List3))

    x_with_string = np.array(Reg_Sat_List).reshape(-1, 1)
    findx = np_f.replace(x_with_string, 'NA', '0')
    Reg_Sat_Input = np.array(findx, dtype=int).reshape(-1, 1)

    Reg_Sat_Input_split = np.split(Reg_Sat_Input, 24)

    y_with_string = np.array(Reg_Weekday_List).reshape(-1, 1)
    findy = np_f.replace(y_with_string, 'NA', '0')
    Reg_Weekday_Input = np.array(findy, dtype=int).reshape(-1, 1)

    Reg_Weekday_Input_split = np.split(Reg_Weekday_Input, 24)

    x_with_string = np.array(Memorial_Day_List).reshape(-1, 1)
    findx = np_f.replace(x_with_string, 'NA', '0')
    Memorial_Day_List_Input = np.array(findx, dtype=int).reshape(-1, 1)

    Memorial_Day_List_Input_split = np.split(Memorial_Day_List_Input, 24)

    for x_hour_frequency in Memorial_Day_List_Input_split:

        for y_hour_frequency in Reg_Sat_Input_split:

            max = np.amax(x_hour_frequency)
            x_normalized_weeks = np.true_divide(x_hour_frequency, max)

            max2 = np.amax(y_hour_frequency)
            y_normalized_weeks = np.true_divide(y_hour_frequency, max2)

            l2_norm = lambda x_normalized_weeks, y_normalized_weeks: (
                x_normalized_weeks - y_normalized_weeks)**2

            dist = dtw(x_normalized_weeks, y_normalized_weeks, dist=l2_norm)

            DTW_Sat.append(dist[0])

            count1 += 1

            count2 += 1

    for x_hour_frequency in Memorial_Day_List_Input_split:

        for y_hour_frequency in Reg_Weekday_Input_split:

            max = np.amax(x_hour_frequency)
            x_normalized_weeks = np.true_divide(x_hour_frequency, max)

            max2 = np.amax(y_hour_frequency)
            y_normalized_weeks = np.true_divide(y_hour_frequency, max2)

            l2_norm = lambda x_normalized_weeks, y_normalized_weeks: (
                x_normalized_weeks - y_normalized_weeks)**2

            dist = dtw(x_normalized_weeks, y_normalized_weeks, dist=l2_norm)

            DTW_Week.append(dist[0])

            count1 += 1

            count2 += 1
    """
    #l2_norm = lambda Test_List_x, Test_List_y: (Test_List_x - Test_List_y) ** 2

    #dist, cost_matrix, acc_cost_matrix, path = dtw(Test_List_x, Test_List_y, dist=l2_norm)

    #print(dist)

    #For the dynamic time warping distance, the smaller the distance, the more
    #similar they are. The larger the distance, the less similar they are.

    plt.imshow(acc_cost_matrix.T, origin='lower', cmap='gray', interpolation='nearest')
    plt.plot(path[0], path[1], 'w')
    plt.show()
    """

    Big_File_Sat = zip(FID, Latitude, Longitude, DTW_Sat)

    Big_File_Week = zip(FID, Latitude, Longitude, DTW_Week)

    Sat_CSV = r"C:\Users\patri\Desktop\Thesis_Final\Results\Sat_CSV.csv"

    Weekday_CSV = r"C:\Users\patri\Desktop\Thesis_Final\Results\Weekday_CSV.csv"

    with open(Sat_CSV, 'w') as file2:

        csv_writer = csv.writer(file2, delimiter=',')

        file2.write('FID,Latitude,Longitude,DTW_Distance' + '\n')

        for line in Big_File_Sat:

            file2.write(''.join(str(line[0])) + ',' + ''.join(str(line[1])) +
                        ',' + ''.join(str(line[2])) + ',' +
                        ''.join(str(line[3])) + ',' + '\n')

    with open(Weekday_CSV, 'w') as file3:

        csv_writer = csv.writer(file3, delimiter=',')

        file3.write('FID,Latitude,Longitude,DTW_Distance' + '\n')

        for line in Big_File_Week:

            file3.write(''.join(str(line[0])) + ',' + ''.join(str(line[1])) +
                        ',' + ''.join(str(line[2])) + ',' +
                        ''.join(str(line[3])) + ',' + '\n')
Esempio n. 32
0
def get_in_coding_rgn(input_dir, features):
    fimo_files = np.sort(np.array(glob.glob(os.path.join(input_dir, "fimo-outs/fimo-out-*"))))
    print('Number of fimo files = ', str(len(fimo_files)))

    #  Get coding regions from features file (e.g. Escherichia_coli_K12_features)
    f = open(features, 'r')
    skip = 1
    line = f.readline()
    while 'header' not in line:
        line = f.readline()
        skip += 1
    f.close()

    features = pd.read_table(features, skiprows=skip)
    features = features[features.type != 'SEQ_END']
    start_pos = npstr.replace(features.start_pos.values.astype(str), '<', '').astype(int)
    end_pos = npstr.replace(features.end_pos.values.astype(str), '>', '').astype(int)

    total_coding_fracs = {}
    for f in fimo_files:
        print("Processing '%s'..." % f)
        fimo = None
        try:
            fimo = pd.read_table(f, sep='\t')
            is_bad = np.zeros(fimo.shape[0], dtype=bool)
            for i in range(fimo.shape[0]):
                row = fimo.ix[i]
                hits = np.sum((features.contig == row['sequence name']) &
                              (start_pos <= row.start) &
                              (end_pos >= row.start))
                if hits <= 0:
                    hits = np.sum((features.contig == row['sequence name']) &
                                  (start_pos <= row.stop) &
                                  (end_pos >= row.stop))
                if hits > 0:
                    is_bad[i] = True

            # write out fimo file with new column, now we save it to new subdirectory,
            # 'coding_fracs/'
            fimo['in_coding_rgn'] = is_bad
            ff = os.path.basename(f).replace('fimo-out-','').replace('.bz2','')
            coding_frac_f = os.path.join(input_dir,
                                         'coding_fracs/' + 'coding-fracs-' + ff + '.tsv.bz2')
            fimo.to_csv(coding_frac_f, sep='\t', index=False, compression="bz2")

            if fimo.shape[0] <= 0:
                continue
            # write out summary for each motif in the run
            grpd = fimo.groupby('#pattern name').mean()
            mean_is_bad = grpd['in_coding_rgn'].values
            mot_ind = grpd.index.values
            mean_is_bad[ mean_is_bad == True ] = 1.0
            mean_is_bad[ mean_is_bad == False ] = 0.0
            for i in range(len(mean_is_bad)):
                total_coding_fracs[ff + '_' + str(mot_ind[i])] = round(mean_is_bad[i], 4)
        except:
            # This is catching a pandas.errors.EmptyDataError
            # However, older versions of pandas don't have this class
            print('SKIPPING -- cannot read fimo output')

    coding_fracs = pd.DataFrame({'motif': [a for a in sorted(total_coding_fracs.keys())],
                                 'coding_frac': [total_coding_fracs[a]
                                                 for a in sorted(total_coding_fracs.keys())]})
    coding_fracs.to_csv(os.path.join(input_dir, "coding_fracs.tsv.bz2"),
                         sep='\t', index=False, compression='bz2')
    return coding_fracs
def create_graph(file_path, tol_path, main_side, wet_test, x_axis_min,
                 x_axis_max, x2_axis_max, y_axis_max, data_start_index,
                 ls_series, title, time_v_res):
    fig = plt.figure()
    ax = fig.add_subplot(111)

    # LS data
    file_name = os.path.basename(file_path).split('.')[0]
    data = np.genfromtxt(file_path,
                         delimiter=',',
                         dtype=str,
                         skip_header=data_start_index)
    data = np_f.replace(
        data, '"', ''
    )  # the csv files have double quotes for some reason - these need to be removed
    data = data.astype(np.float)  # convert remaining data to float
    height_array = data[:, 1]
    min_height = np.amin(height_array)
    max_height = np.amax(height_array)
    mid_height = (max_height - min_height) / 2
    low_to_high_sweep = True if height_array[0] < mid_height else False
    if low_to_high_sweep:  # if sweep starts low and goes high, use this algorithm
        first_transition_index = int(np.argwhere(height_array > mid_height)[0])
        second_transition_index = int(
            np.argwhere(height_array[first_transition_index:] < mid_height -
                        10)[0]) + first_transition_index
        end_height_index = np.argmin(height_array[second_transition_index:(
            first_transition_index +
            second_transition_index)]) + second_transition_index
    else:  # if sweep starts high and goes low, use this algorithm
        first_transition_index = int(np.argwhere(height_array < mid_height)[0])
        second_transition_index = int(
            np.argwhere(height_array[first_transition_index:] > mid_height +
                        10)[0]) + first_transition_index
        end_height_index = np.argmax(height_array[second_transition_index:(
            first_transition_index +
            second_transition_index)]) + second_transition_index
    data = data[:end_height_index, :]

    show_tolerance = True if ls_series in ['MLS', 'E7x', 'F1x', 'F2x'
                                           ] else False

    # Tolerance data
    if show_tolerance:
        if ls_series == 'MLS' and main_side == 'True' and wet_test == 'True':
            tol_file_name = 'MLS Tolerance (MS, wet).csv'
        if ls_series == 'MLS' and main_side == 'True' and wet_test == 'False':
            tol_file_name = 'MLS Tolerance (MS, dry).csv'
        if ls_series == 'MLS' and main_side == 'False' and wet_test == 'True':
            tol_file_name = 'MLS Tolerance (SS, wet).csv'
        if ls_series == 'MLS' and main_side == 'False' and wet_test == 'False':
            tol_file_name = 'MLS Tolerance (SS, dry).csv'

        if ls_series == 'E7x' and main_side == 'True' and wet_test == 'True':
            tol_file_name = 'E7x Tolerance (MS, wet).csv'
        if ls_series == 'F1x' and main_side == 'True' and wet_test == 'True':
            tol_file_name = 'F1x Tolerance (MS, wet).csv'
        if ls_series == 'F2x' and main_side == 'True' and wet_test == 'True':
            tol_file_name = 'F2x Tolerance (MS, wet).csv'
        if ls_series == 'E7x' and main_side == 'False' and wet_test == 'True':
            tol_file_name = 'E7x Tolerance (SS, wet).csv'
        if ls_series == 'F1x' and main_side == 'False' and wet_test == 'True':
            tol_file_name = 'F1x Tolerance (SS, wet).csv'

        if ls_series == 'E7x' and main_side == 'True' and wet_test == 'False':
            tol_file_name = 'E7x Tolerance (MS, dry).csv'
        if ls_series == 'F1x' and main_side == 'True' and wet_test == 'False':
            tol_file_name = 'F1x Tolerance (MS, dry).csv'
        if ls_series == 'F2x' and main_side == 'True' and wet_test == 'False':
            tol_file_name = 'F2x Tolerance (MS, dry).csv'
        if ls_series == 'E7x' and main_side == 'False' and wet_test == 'False':
            tol_file_name = 'E7x Tolerance (SS, dry).csv'
        if ls_series == 'F1x' and main_side == 'False' and wet_test == 'False':
            tol_file_name = 'F1x Tolerance (SS, dry).csv'

        tol_color = 'black' if main_side == 'True' else 'red'

        tol_file_path = os.path.join(tol_path, tol_file_name)
        tol_data = np.genfromtxt(tol_file_path,
                                 delimiter=',',
                                 dtype=str,
                                 skip_header=1)
        tol_data = np_f.replace(
            tol_data, '"', ''
        )  # the csv files have double quotes for some reason - these need to be removed
        tol_data = tol_data.astype(np.float)  # convert remaining data to float

    if wet_test == 'True':
        time_axis_inc = 10
    else:
        time_axis_inc = 1
    time = data[:, 0]
    sys_height = data[:, 1]
    ls_ohms = data[:, 4]
    if low_to_high_sweep:
        min_indice = np.argmax(sys_height)
        empty_to_full = ls_ohms[:min_indice]
        full_to_empty = ls_ohms[min_indice:]
        empty_to_full_sys = sys_height[:min_indice]
        full_to_empty_sys = sys_height[min_indice:]
    else:
        min_indice = np.argmin(sys_height)
        empty_to_full = ls_ohms[min_indice:]
        full_to_empty = ls_ohms[:min_indice]
        empty_to_full_sys = sys_height[min_indice:]
        full_to_empty_sys = sys_height[:min_indice]

    plot_time_v_res = True if ls_series == 'MLS' or time_v_res == 'True' else False

    # height vs resistance
    ax.set_xlim(int(x_axis_min), int(x_axis_max))
    ax.set_ylim(0, float(y_axis_max))
    e_to_f_plot = ax.plot(empty_to_full_sys,
                          empty_to_full,
                          linewidth=0.5,
                          label='R vs. H - fill',
                          color='blue')
    f_to_e_plot = ax.plot(full_to_empty_sys,
                          full_to_empty,
                          linewidth=0.5,
                          label='R vs. H - drain',
                          color='magenta')
    ax.set_xlabel('Height / mm', fontsize=7)
    ax.set_ylabel(r'Resistance / $\Omega$', fontsize=7)
    start, end = ax.get_xlim()
    if plot_time_v_res:
        ax.xaxis.set_ticks(np.arange(int(start), end + 10, 10))
    else:
        increment = roundup((end - start) / 25)
        ax.xaxis.set_ticks(np.arange(int(start), end, 50))
    lns = e_to_f_plot + f_to_e_plot

    # time vs resistance
    if plot_time_v_res:
        ax2 = ax.twiny()
        ax2.set_xlim(0, int(x2_axis_max))
        #ax2.set_ylim([0, float(y_axis_max)])
        r_vs_t = ax2.plot(time,
                          ls_ohms,
                          linewidth=0.5,
                          label='Resistance vs. Time',
                          color='orange')
        ax2.set_xlabel('Time / s', fontsize=7)
        start, end = ax2.get_xlim()
        ax2.xaxis.set_ticks(np.arange(start, end, time_axis_inc))
        lns = lns + r_vs_t
        ax2.tick_params(labelsize=5)

    # tolerance bands
    if show_tolerance:
        low_tol_plot = ax.plot(tol_data[:, 0],
                               tol_data[:, 1],
                               linewidth=0.5,
                               color=tol_color,
                               label='Tolerance',
                               linestyle=':')
        up_tol_plot = ax.plot(tol_data[:, 2],
                              tol_data[:, 3],
                              linewidth=0.5,
                              color=tol_color,
                              linestyle=':')
        lns = lns + low_tol_plot

    # create legend
    labs = [l.get_label() for l in lns]
    ax.legend(lns, labs, loc=3, fontsize='x-small')

    ax.tick_params(labelsize=5)
    ax.grid(linewidth=0.1)

    if plot_time_v_res:
        ax.set_title(title, fontsize=10,
                     y=1.08)  # this raises the title to fit the top x-axis
    else:
        ax.set_title(title, fontsize=10)

    start, end = ax.get_ylim()
    ax.yaxis.set_ticks(np.arange(0, end, 50))

    if plot_time_v_res:
        fig.savefig(file_path.replace('.csv', '.png'), dpi=1000)
    else:
        make_pdf(file_path)

    #plt.show()

    plt.close(
        fig
    )  # must close figure, or there will be a memory error when running batch_graph_creator.py
Esempio n. 34
0
def run():
    parser = argparse.ArgumentParser(description="Examples: \n" +\
     "calc_spectra data/vega.pkl data/vega/ -i 0.000 1.5707963267948966 150; " +\
     "calc_spectra data/vega.pkl data/vega/ -i 0.088418; " +\
     "calc_spectra data/altair.pkl data/altair/ -i 0.8840; " +\
     "calc_spectra data/achernar.pkl data/achernar/ -i 1.0577")
    parser.add_argument("pkl_sfile", help="the pickled star file")
    parser.add_argument("output", help="the output directory")
    parser.add_argument(
        '-i',
        type=float,
        nargs='+',
        help='either a single inclination in radians ' +
        'or a equally spaced values specified by minimum, maximum and number',
        required=True)
    parser.add_argument("-m", help="longitudinal integration method: 0=cubic(default), 1=trapezoidal", type=int, \
      default=0)
    args = parser.parse_args()

    ## inputs
    pkl_sfile = args.pkl_sfile  # pickled star file
    output = args.output  # output location

    # integration method
    if args.m == 0:
        m = 'cubic'
    elif args.m == 1:
        m = 'trapezoid'
    else:
        sys.exit(
            "Longitudinal integration method should be either 0 (cubic) or 1 (trapezoidal)."
        )

    # inclinations
    i = args.i
    li = len(i)
    if li not in [1, 3]:
        sys.exit("Please specify either a single inclination in radians (one number) " +\
         "or a range specified by minimum, maximum and step (three numbers).")
    elif li == 1:
        inclinations = np.array(i)
        # decimal precision of inclination for printout
        prec = 6
    elif li == 3:
        mi, ma, num = i
        inclinations = np.linspace(mi, ma, num=int(num))
        # decimal precision of inclination for printout
        prec = np.int(np.ceil(-np.log10((ma - mi) / num)))
    leni = len(inclinations)

    # unpickle the star
    with open(pkl_sfile, 'rb') as f:
        st = pickle.load(f)
    # get the wavelengths at which we see light from this star
    wl = st.wavelengths

    ## write the spectra of the star in text format
    # create the directory if it doesn't exist
    if not os.path.exists(output):
        os.mkdir(output)
    # filenames
    if not output.endswith('/'):
        output += '/'
    filename = os.path.splitext(os.path.basename(pkl_sfile))[0]
    inc_str = np.array([("%." + str(prec) + "f") % x
                        for x in np.round(inclinations, decimals=prec)])
    ofiles = ch.add(output + filename, inc_str)
    ofiles = ch.replace(ofiles, '.', '_')
    ofiles = ch.add(ofiles, '.txt')

    for i, ofile in np.ndenumerate(ofiles):
        # message
        if i[0] % 10 == 0:
            print(
                str(i[0]) + " out of " + str(leni) +
                " inclinations calculated.")
            sys.stdout.flush()
        # current inclination
        inc = inclinations[i]
        # calculate the spectrum or the magnitudes
        light = st.integrate(inc, method=m)

        # create this file if it doesn't exist, open it for writing
        f = open(ofile, 'w+')
        # write the header
        f.write('# luminosity: ' + str(st.luminosity) + '\n')
        f.write('# omega: ' + str(st.surface.omega) + '\n')
        f.write('# inclination(rad): ' + str(inclinations[i]) + '\n')
        f.write('# mass: ' + str(st.mass) + '\n')
        f.write('# Req: ' + str(st.Req) + '\n')
        f.write('# distance: ' + format(st.distance, '.2e') + ' cm\n')
        f.write('# A_V: ' + format(*(st.a_v), '.2f') + '\n')
        f.write('# number of upper half z values: ' + str(st.map.nz) + '\n')
        # write the spectrum to the file
        f.write('\n')
        if st.bands is None:  # spectrum mode
            f.write('# wavelength(nm)\tflux(ergs/s/Hz/ster)\n')
            for j, w in np.ndenumerate(wl):
                f.write(str(w))
                f.write('\t %.5E' % light[j])
                f.write('\n')
        else:  # photometry mode
            f.write('# filter\twavelength(nm)\tmagnitude\n')
            for j, w in enumerate(wl):
                f.write(st.bands[j])
                f.write('\t %.6g' % w)
                f.write('\t %.8f' % light[j])
                f.write('\n')
        f.close()
rank_attr

top50

#######################################################
#### LGBM
#######################################################


##Test 1: Normal

dataset_y = accidents_train[ 'Accident_Severity' ]
dataset_x = accidents_train.drop([ 'Accident_Severity' ], axis= 1 )
X_train, X_test, y_train, y_test = train_test_split(dataset_x, dataset_y, test_size= 0.20 , random_state= 42 )
col = np.array(X_train.columns , dtype = str)
col = np_f.replace(col, ':', '=')

X_train.columns = col
X_test.columns = col

#1) LGBM normal
print('LGBM - Normal')

model = lgb.LGBMClassifier(random_state = 42)
model.fit(X_train, y_train, eval_metric='multi_logloss',eval_set=[(X_test, y_test)],early_stopping_rounds=50)

y_probas=model.predict_proba(X_test)
roc = roc_auc_score(np.where(y_test==1 , 1 ,0), y_probas[:,0])
skplt.metrics.plot_roc_curve(y_test, y_probas)
plt.show()
saveResults[2][0] = roc