Python extractの例、extraction.extract Pythonの例

コード例 #1

0

ファイルを表示

def main(ofile, sep=';'):
    # Monta a base de dados das imagens
    db = commons.buildDatabase()

    # Carrega as características da imagem e salva na lista table
    print('[LOG] Extraindo as características')
    table = []
    if os.path.exists(CHARACTERISTICS_CSV):
        table = commons.loadCaracteristics(CHARACTERISTICS_CSV)
    else:
        table = extraction.extract(db)
        commons.saveCaracteristics(table, CHARACTERISTICS_CSV)

    # Define um vetor com os rótulos das imagens
    labelImage = np.array(['benigno'] * 20 + ['maligno'] * 20)

    # Abre o arquivo CSV de classificação para escrita
    with open(ofile, "w") as f:
        # Escreve o cabeçalho do arquivo
        header = ['id', 'channel', 'test_size'] + [m.NAME for m in METHODS]
        f.write(sep.join(header) + '\n')

        # Para cada canal
        idExperiment = 1
        for ch in ['red', 'green', 'blue', 'gray']:
            print(
                f'[LOG] Aplicando os métodos de classificação para o canal {ch}'
            )

            # Para cada tamanho de grupo de teste
            for test_size in np.arange(0.1, 1.0, 0.1):
                # Inicia a lista que salva a acurácia do método para calcular a média
                for method in METHODS:
                    method.accur = []

                # Realiza consecutivos experimentos, visando calcular a média
                for experiment in range(TESTS):
                    while True:
                        # Separa em conjunto de treino/teste de acordo com o tamanho do grupo de teste definido
                        x_train, x_test, y_train, y_test = train_test_split(
                            table[ch],
                            labelImage,
                            test_size=test_size,
                            random_state=None)

                        # O conjunto de teste sempre deve possuir pelo menos dois tipos (benigno/maligno)
                        if len(np.unique(y_train)) > 1:
                            break

                    # Para cada método, realizar a classificação com os conjuntos definidos e salva a acurácia
                    for method in METHODS:
                        method.accur.append(
                            method.classify(x_train, x_test, y_train, y_test))

                # Escreve no arquivo a acurácia média do método
                f.write(
                    sep.join([f'{idExperiment}', ch, f'{test_size:.02f}'] +
                             [f'{np.mean(m.accur):.03f}'
                              for m in METHODS]) + '\n')
                idExperiment += 1

コード例 #2

0

ファイルを表示

ファイル: naivebayes_classifier.py プロジェクト: the-tempest/data_cleansing

    def train(self):
        """ Will train given some training data, can edit this later. must give path to directory
				need to put r in front of training_dir for windows at least"""
        print "training now"
        print self.training_dir
        training_files_list = [
            f for f in listdir(self.training_dir) if isfile(join(self.training_dir, f))
        ]  # gets list of files in the training _dir
        print training_files_list
        for training_file in training_files_list:
            file_path = os.path.join(self.training_dir, training_file)  # build up the whole path
            print file_path + "\n"
            table_name = extraction.extract(file_path)
            t = getTable(table_name, user, password, host, database)  #  returns table object
            column_names = []
            for column in t.columns:
                # remove characters 0-9 in column name
                firstNum = "0"
                for x in range(10):
                    column.colName = column.colName.replace(chr(ord(firstNum) + x), "")
                print column.colName
                if (
                    column.colName in self.types
                ):  # building the columns we are going to train as long as they are types we want
                    column_names.append(column.colName)
            t.build_column_index()
            for col in column_names:
                index = t.column_index[col]
                column_obj = t.columns[index]  # we have the column object now
                self.train_on_column(column_obj)
        print "done training"
        self.save(self.trained_dictionary, path + "new_trained_dictionary.dat")

コード例 #3

0

ファイルを表示

ファイル: flatfield.py プロジェクト: guyonnet/slitless_image_reduction

    def Calibrate(self, footprint, head):
        if self.monochro_flat_list:
            logging.info("A synthetic flat is built")
            synthetic_flat = self.BuildSyntheticFlat()
        else:
            sys.exit('Should propose other pixel calibration\
            than from monochromatic flatfields')

        fig = pl.figure()
        footprint = footprint / synthetic_flat  # out in Nb of photons
        #pl.imshow(footprint, cmap='hot', aspect='auto')
        #pl.show()
        X, calib_flat = self.project(synthetic_flat)
        '''Extract after calibration'''
        get = ex.extract(self.spectrum, footprint)
        X, calib_profile = get.ExtractSpectrum()
        '''Dump spectrum'''
        logging.info(
            "Writing calibrated spectrum using synthetic flat into calibspectrum.list"
        )
        self.spectrum.DumpSpectrum(
            head, ('pixel', 'w', 'rawflux', 'califlux'),
            (self.spectrum.pixel, self.spectrum.wavelength,
             np.array(self.spectrum.aper_flux), np.array(calib_profile)),
            str("calibspectrum" + self.spectrum.order + ".list"))

        if self.spectrum.plot:
            self.ControlPlot1(X, calib_profile, calib_flat,
                              self.spectrum.aper_flux)

コード例 #4

0

ファイルを表示

ファイル: numeric_classifier.py プロジェクト: the-tempest/data_cleansing

    def train(self, training_dir):
        ''' Will train given some training data, can edit this later. must give path to directory
				need to put r in front of training_dir for windows at least'''

        training_files_list = [
            f for f in listdir(training_dir) if isfile(join(training_dir, f))
        ]  # gets list of files in teh training _dir
        for training_file in training_files_list:

            file_path = os.path.join(training_dir,
                                     training_file)  # build up the whole path
            print file_path + "\n"
            table_name = extraction.extract(file_path)
            t = getTable(table_name, user, password, host,
                         database)  #  returns table object
            column_names = []

            for column in t.columns:
                # remove characters 0-9 in column name
                firstNum = "0"
                for x in range(10):
                    column.colName = column.colName.replace(
                        chr(ord(firstNum) + x), "")

                if column.colName in self.types:  #building the columns we are going to train as long as they are types we want
                    column_names.append(column.colName)

            t.build_column_index()

            for col in column_names:
                index = t.column_index[col]
                column_obj = t.columns[index]  # we have the column object now
                self.train_on_column(column_obj)

        self.save(self.trained_dictionary, path + "trained_dictionary.dat")

コード例 #5

0

ファイルを表示

	def component_extract():
		

		
		#read the components_boxes
		temp_comp = "sample_images/components"
		pick_comp = extract(image,temp_comp)

コード例 #6

0

ファイルを表示

ファイル: __init__.py プロジェクト: denigma/denigma

def download(url, filename=None, folder=False):
    """Downloads a file and tries to extract it."""
    if not filename:
        filename = url.split('/')[-1]
        url_file = url
    else: url_file = url+filename
    print("Downloading file %s from url: %s" %  (filename, url))
    if folder:
        response = urllib.urlretrieve(url_file, os.path.join(folder, filename))
    else:
        response = urllib.urlretrieve(url_file, filename)
    print("Download completed for: %s" % filename)
    try:
        if folder:
            extract(filename, folder=folder)
        else:
            extract(filename)
    except: print("Did not extract %s" % filename)

コード例 #7

0

ファイルを表示

ファイル: process.py プロジェクト: Tubbz-alt/treasure

    def __init__(self):
        notifications.__init__(self)

        # Variables
        self.__found = []  # Found items.
        self.__unique = {}  # Unique items

        # Instantiations variables
        self.__extract = extract()
        self.__notify = notifications()

コード例 #8

0

ファイルを表示

def download(url, filename=None, folder=False):
    """Downloads a file and tries to extract it."""
    if not filename:
        filename = url.split('/')[-1]
        url_file = url
    else:
        url_file = url + filename
    print("Downloading file %s from url: %s" % (filename, url))
    if folder:
        response = urllib.urlretrieve(url_file, os.path.join(folder, filename))
    else:
        response = urllib.urlretrieve(url_file, filename)
    print("Download completed for: %s" % filename)
    try:
        if folder:
            extract(filename, folder=folder)
        else:
            extract(filename)
    except:
        print("Did not extract %s" % filename)

コード例 #9

0

ファイルを表示

	def arrow_extract():

		#read the self arrow
		slf= "sample_images/self_arrow"
		pick_self = extract(image,slf)

		#read the right_to_left arrow
		rght_lft = "sample_images/right_arrow"
		pick_rght_lft = extract(image,rght_lft)

		#read the left_to_right arrow
		lft_rght = "sample_images/left_arrow"
		pick = extract(image,lft_rght)		
		pick_lft_rght=[]
		for i in range(len(pick)):
			f=1
			for j in range(len(pick_self)):
				if (pick[i][0]>=pick_self[j][0] and pick[i][2]<=pick_self[j][2]):
					f=0
					break
			if(f == 1):
				pick_lft_rght.append(pick[i])

コード例 #10

0

ファイルを表示

ファイル: error_detection_number.py プロジェクト: the-tempest/data_cleansing

	def execute1(self, filename):
		filename = filename.replace("\n", "")
		filename = filename.replace(" ", "_")
		table_name = extraction.extract(filename)
		#self.t = getTable(table_name);
	# call Keith and Pawel's script
		c = column_typer(self.t);
		cl = c.build_report();
		print self.t.columns[0].tentClass
		print "below"
		dirToSave = path+"output";
		fn = table_name + ".txt"
		pathToSave = os.path.join(dirToSave, fn);
		
		with open(pathToSave, "w") as text_file:
			text_file.write(cl);

コード例 #11

0

ファイルを表示

ファイル: identification.py プロジェクト: hazemalsaied/ATILF-LLF-MWE-Analyser

def identify(outputPath, load=False, multipleFile=False):
    corpus = Corpus(multipleFile=multipleFile, load=load)
    print 'Corpus loaded'
    if load:
        svm = SVMClf(None, None, load=load)
    else:
        oracle.parse(corpus)
        print 'parse finished'
        labels, data = extract(corpus)
        print 'Training data was prepared'
        svm = SVMClf(labels, data, load=load, save=multipleFile)
    print 'Evaluation started'
    parse(corpus, svm.classifier, svm.verctorizer)
    # with open(outputPath, 'w') as f:
    #    f.write(str(corpus))
    # evaluate(corpus)
    print 'finished'

コード例 #12

0

ファイルを表示

ファイル: old_main.py プロジェクト: the-tempest/data_cleansing

def execute(filename):
    filename = filename.replace("\n", "")
    filename = filename.replace(" ", "_")
    table_name = extraction.extract(filename);
    t = getTable(table_name);

    '''numClass = numeric_classifier();
    result = "";
    for col in t.columns:
        diction = {}
        for item in col.rows:
            res = numClass.classify(item);
            if res in diction:
                diction[res] += 1;
            else:
                diction[res] = 1;
        result += col.colName + ': ' + max(diction.iteritems(), key=operator.itemgetter(1))[0];
        result += '\n'
        '''


    # call Keith and Pawel's script
    c = column_typer(t);
    cl = c.build_report();
    
    # collect statistics
    results = c.table_typify(t)
    ct.tally_and_save(results)

    #with open('output/' + table_name + '.txt', 'w') as outfile:
    #    json.dump(cl, outfile);

    # errors commented to avoid gettin an email from the server
    # detective = error_detector(t)
    # possible_errors_dictionary = detective.find_table_errors(errors_to_check_list)


    dirToSave = path+"output";
    fn = table_name + ".txt"
    pathToSave = os.path.join(dirToSave, fn);
    print pathToSave
    print 'this'
    with open(pathToSave, "w") as text_file:
        text_file.write(cl);

コード例 #13

0

ファイルを表示

ファイル: extraction_test.py プロジェクト: sofianhw/verifikatorc1

 def assert_extraction_as_expected(self, c1_form):
     transformed_path = 'test/resources/forms/transformed/'
     for root, dirs, file_names in walk(transformed_path):
         found_files = fnmatch.filter(file_names, '*' + c1_form + '.jpg')
     self.assertEqual(1, len(found_files), msg="only one file should match pattern")
     output_path = tempfile.gettempdir()
     probability_map = extraction.extract(found_files[0], transformed_path, output_path, settings.STATIC_DIR)
     expected_json_file_path = 'test/resources/probabilities/' + c1_form + '.json'
     with io.open(expected_json_file_path, 'r') as expected_json_file:
         expected = expected_json_file.read()
         self.maxDiff = None
         expected_json = json.loads(expected)
         actual_json = json.loads(probability_map)
     if (expected_json != actual_json) and self.overwrite_resources:
         json_string = json.dumps(actual_json, encoding='utf-8')
         with io.open(expected_json_file_path, 'w', encoding='utf-8') as expected_json_file:
             expected_json_file.write(unicode(json.dumps(actual_json, ensure_ascii=False, indent=4, separators=(',', ': '), sort_keys=True)))
             expected_json_file.flush()
             expected_json_file.close()
     self.assertEqual(expected_json, actual_json)

コード例 #14

0

ファイルを表示

def execute(filename):
    filename = filename.replace("\n", "")
    filename = filename.replace(" ", "_")
    table_name = extraction.extract(filename)
    t = getTable(table_name)
    '''numClass = numeric_classifier();
    result = "";
    for col in t.columns:
        diction = {}
        for item in col.rows:
            res = numClass.classify(item);
            if res in diction:
                diction[res] += 1;
            else:
                diction[res] = 1;
        result += col.colName + ': ' + max(diction.iteritems(), key=operator.itemgetter(1))[0];
        result += '\n'
        '''

    # call Keith and Pawel's script
    c = column_typer(t)
    cl = c.build_report()

    # collect statistics
    results = c.table_typify(t)
    ct.tally_and_save(results)

    #with open('output/' + table_name + '.txt', 'w') as outfile:
    #    json.dump(cl, outfile);

    # errors commented to avoid gettin an email from the server
    # detective = error_detector(t)
    # possible_errors_dictionary = detective.find_table_errors(errors_to_check_list)

    dirToSave = path + "output"
    fn = table_name + ".txt"
    pathToSave = os.path.join(dirToSave, fn)
    print pathToSave
    print 'this'
    with open(pathToSave, "w") as text_file:
        text_file.write(cl)

コード例 #15

0

ファイルを表示

ファイル: ftp.py プロジェクト: AaronMBrown/denigma

 def retrieve(self, files):
     """Retrieves specified file(s)."""
     if isinstance(files, str):
         files = [files]
     for name in files:
         for f in self:
             if f.startswith(name):
                 print self.url+'/'+f, f
                 response = urllib.urlretrieve(self.url+'/'+f, os.path.join(self.path, f))
                 print("+ {0} retrieved".format(f))
                 if f.endswith('.gz'):
                     files = extract(f, folder=self.path)             # upzipFile(f)
                     if isinstance(files, list):
                         self.extracted.extend(files)
                     else:
                         self.extracted.append(files)
                     extracted = f[:-3]
                     print("+ {0} extracted".format(extracted))
                     print("- {0} deleted".format(f))
                     f = extracted
                 self.downloads.append(f)

コード例 #16

0

ファイルを表示

def execute(filename):
    filename = filename.replace("\n", "")
    filename = filename.replace(" ", "_")
    table_name = extraction.extract(filename)
    t = getTable(table_name)

    # call column classifier script
    x = main_classifier()
    x.new_table(t)
    cl = x.report

    # collect statistics
    ct.tally_and_save(x.results)

    dirToSave = path + "output"
    fn = table_name + ".txt"
    pathToSave = os.path.join(dirToSave, fn)
    print pathToSave
    print 'this'
    with open(pathToSave, "w") as text_file:
        text_file.write(cl)

コード例 #17

0

ファイルを表示

ファイル: main.py プロジェクト: the-tempest/data_cleansing

def execute(filename):
    filename = filename.replace("\n", "")
    filename = filename.replace(" ", "_")
    table_name = extraction.extract(filename);
    t = getTable(table_name);

    # call column classifier script
    x = main_classifier();
    x.new_table(t)
    cl = x.report
    
    # collect statistics
    ct.tally_and_save(x.results)

    dirToSave = path+"output";
    fn = table_name + ".txt"
    pathToSave = os.path.join(dirToSave, fn);
    print pathToSave
    print 'this'
    with open(pathToSave, "w") as text_file:
        text_file.write(cl);

コード例 #18

0

ファイルを表示

ファイル: identification.py プロジェクト: hazemalsaied/LPP

def identify():
    corpus = Corpus(multipleFile=False)
    lexicon = {}
    for sent in corpus.trainingSents:
        for mwe in sent.vMWEs:
            if len(mwe.tokens) > 1:
                lexicon[getTokenText(mwe.tokens)] = True
    res = ''
    for k in sorted(lexicon.keys()):
        res += k + '\n'
    with open(
            '/Users/halsaied/PycharmProjects/LePetitPrince/Corpora/LPP/mweLEX.txt',
            'w') as F:
        F.write(res)
    return
    oracle.parse(corpus)
    labels, data = extract(corpus)
    svm = SVMClf(labels, data)
    parse(corpus, svm.classifier, svm.verctorizer)
    evaluate(corpus)
    print "condidence calculation"
    calculateConfidence(corpus)

コード例 #19

0

ファイルを表示

 def retrieve(self, files):
     """Retrieves specified file(s)."""
     if isinstance(files, str):
         files = [files]
     for name in files:
         for f in self:
             if f.startswith(name):
                 print self.url + '/' + f, f
                 response = urllib.urlretrieve(self.url + '/' + f,
                                               os.path.join(self.path, f))
                 print("+ {0} retrieved".format(f))
                 if f.endswith('.gz'):
                     files = extract(f, folder=self.path)  # upzipFile(f)
                     if isinstance(files, list):
                         self.extracted.extend(files)
                     else:
                         self.extracted.append(files)
                     extracted = f[:-3]
                     print("+ {0} extracted".format(extracted))
                     print("- {0} deleted".format(f))
                     f = extracted
                 self.downloads.append(f)

コード例 #20

0

ファイルを表示

ファイル: analysis.py プロジェクト: vtu-datatools/vtu-robo-advice

from extraction import extract
from processing import tree_rooted_at_uri
from render import decision_tree_to_dot

from models import DecisionNode

# This includes inaccessible nodes
full_decision_tree = extract()

# Tree accessible from root node /contest/1
decision_tree = tree_rooted_at_uri("/contest/1", full_decision_tree)

with open("graphs/advice_full.dot", "w") as f:
    f.write(decision_tree_to_dot(decision_tree))

# /contest/1 has three main subgraphs
# /contest/14: I am having issues getting my security deposit back
# /contest/11: I need help getting something repaired
# /contest:29: I need help with an evicition

decision_tree_sec_dep = tree_rooted_at_uri("/contest/14", full_decision_tree)
with open("graphs/advice_sec_dep.dot", "w") as f:
    f.write(decision_tree_to_dot(decision_tree_sec_dep))

decision_tree_repairs = tree_rooted_at_uri("/contest/14", full_decision_tree)
with open("graphs/advice_repairs.dot", "w") as f:
    f.write(decision_tree_to_dot(decision_tree_repairs))

decision_tree_evictions = tree_rooted_at_uri("/contest/29", full_decision_tree)
with open("graphs/advice_evictions.dot", "w") as f:
    f.write(decision_tree_to_dot(decision_tree_evictions))

コード例 #21

0

ファイルを表示

ファイル: process.py プロジェクト: CyberScions/Loot

 def __init__(self):
     self.__found = []
     self.__unique = {}
     self.__extract = extract()
     self.__export = Exportation()
     self.__notify = notifications()

コード例 #22

0

ファイルを表示

	def point_extract():
		#read the boxes
		template = "sample_images/small_boxes"
		pick_box = extract(image,template)

コード例 #23

0

ファイルを表示

ファイル: make.py プロジェクト: dneise/study_cta_sst1m_darkcount_rate

for path in tqdm(glob(input_dir+'/*')):

    # my own convention:
    # digicamtoy MCs don't have a field named digicam_baseline
    # So here we have to estimate the baseline ourselves.
    if 'toy' in path:
        use_digicam_baseline = False
    else:
        use_digicam_baseline = True

    outpath = path.replace(input_dir, out_dir)+'.h5'
    if not os.path.isfile(outpath):
        extract(
            input_file=path,
            output_file=outpath,
            use_digicam_baseline=use_digicam_baseline,
        )

dfs = []
for path in glob(out_dir+'/*'):

    df = estimate_darkcount_rate_max_min(path)
    df['method'] = 'max_min'
    df['path'] = path
    dfs.append(df)

    df = estimate_darkcount_rate_random_charge(path)
    df['method'] = 'random_charge'
    df['path'] = path
    dfs.append(df)

コード例 #24

0

ファイルを表示

ファイル: process.py プロジェクト: CyberScions/Loot

 def __init__(self):
     self.__found = []
     self.__unique = {}
     self.__extract = extract()
     self.__export = Exportation()
     self.__notify = notifications()

コード例 #25

0

ファイルを表示

ファイル: afile.py プロジェクト: al-layth/denigma

 def extract(self, folder='.'):
     self.name = extract(self.name, self.path)
     print("Filename of extracted file is %s" % self.name)

コード例 #26

0

ファイルを表示

ファイル: datasource.py プロジェクト: icostan/kaggle

def load_tfidf_data(data, encoder):
    x_q1 = e.extract(data, 'question1', encoder)
    x_q2 = e.extract(data, 'question2', encoder)
    data['tfidf_ratio'] = data.apply(
        normalized_tfidf, x_q1=x_q1, x_q2=x_q2, axis=1, raw=True)

コード例 #27

0

ファイルを表示

ファイル: afile.py プロジェクト: AaronMBrown/denigma

 def extract(self, folder='.'):
     self.name = extract(self.name, self.path)
     print("Filename of extracted file is %s" % self.name)

コード例 #28

0

ファイルを表示

ファイル: main.py プロジェクト: the-dimensionless/NLP_with_Python

from retrieve import getText
from preprocess import parse
from extraction import extract

article_url = "https://www.washingtonpost.com/news/the-switch/wp/2016/10/18/the-pentagons-massive-new-telescope-is-designed-to-track-space-junk-and-watch-out-for-killer-asteroids/"
summary = extract(getText(article_url), 3)

print("Summary -> \n")
for i in summary:
    print(i + "\n")

コード例 #29

0

ファイルを表示

ファイル: views.py プロジェクト: sofianhw/verifikatorc1

def extract(request):
    filename = request.GET.get("filename", "")
    output = extraction.extract(filename, settings.STATIC_DIR, path.join(settings.STATIC_DIR, 'extracted'),
                                settings.STATIC_DIR, load_config(request.GET.get("configFile")))
    return HttpResponse(output)

コード例 #30

0

ファイルを表示

    def TargetSpectrum(self, Direction='y', **kwargs):
        logging.info('Opening of image : ' + str(self.image))
        inst = instru.telinst(self.image, verbose='')
        image_data = inst.Image(self.image)
        head = inst.header
        #name       = head['OBJECT'] + head['RECID']
        footprint  = tb.Flip(image_data[self.y_start : self.y_end,\
                                     self.x_start : self.x_end], flip=self.flip)  # Select the footprint
        '''Loading frame from segmentation.fits'''
        if (self.mask):
            logging.info('Extracting frame from : ' + str(self.mask))
            seg = instru.telinst(self.mask, verbose='')
            m = seg.Image(self.mask)
            footprint_mask = tb.Flip(
                m[self.y_start:self.y_end, self.x_start:self.x_end],
                flip=self.flip)  # Flip footprint mask if m-1 order
        '''Subtract to the footprint a periodic electronic patterns in parallel direction'''
        profile = []
        if ((self.mask) and (self.isr == True)):
            logging.info("Doing ISR")
            Isr = isr.isr(self)
            profile = Isr.Mask(inst, Direction)
            if (len(profile) != 0):  # subtract periodic background
                footprint = tb.SubtractProfile(footprint, profile)
        '''Determine and write map of defects   '''
        '''if True, Correct for defective pixels'''
        if self.dispersion is True:
            "initialize flatfielding : needed both for DefectMap and calibrate"
            flat = fd.flatfield(self, footprint, head, self.dispers)
            if ((self.Map is True) and (flat.masterflat_list != None)):
                footprint = flat.divideByMap(footprint)
            else:
                logging.info("No flatfielding by map of defects")
        ''' Extract a map of cosmic in footprint'''
        ''' Write footprint before removing cosmics in case of other than aperture phot'''
        ''' Cosmics in footprint are also replaced with median of surrounding pixels'''
        if ((self.mask) and (self.cosmic == True)):
            mean, sigma = tb.BkgdStat(footprint, footprint_mask)
            cosmics = ri.filters(plot=self.plot)
            cosmicimage = cosmics.Cosmics2(footprint, self.seeing, mean, sigma)
            hdu = pf.PrimaryHDU(cosmicimage)
            name = str("cosmics" + self.order + ".fits")
            hdu.writeto(os.path.join(self.out_dir, name), overwrite=True)
            logging.info(
                "Cosmics that are found are replaced with median of surrounding pixels "
            )

        hdu = pf.PrimaryHDU(footprint)
        name = str("footprint" + self.order + ".fits")
        hdu.writeto(os.path.join(self.out_dir, name), overwrite=True)
        ''' Extract raw profile'''
        ''' Testing two versions:'''
        '''1st is constant aperture'''
        '''second used gaussian fit to determine center and width'''
        logging.info("Spectrum extraction method : " + str(self.method))

        get = ex.extract(self, footprint, plot=self.plot)

        if (self.method == 0):
            get.flux(mode='both')
            self.pixel, self.aper_flux = get.pixel, get.aper_flux

        if (self.method == 1):
            self.pixel, self.aper_flux = get.ExtractSpectrum()
            #self.pixel, self.aper_flux = get.aperture()

        if (self.method == 2):
            '''First pass to determine center from gaussian fit'''
            pixel, aper_flux, center, flux, sigma =\
            get.ExtractSpectrum2( mode = 'psf',
                                  plot = self.plot)
            '''The trace of the centroid of the spectrum'''
            traceC, traceS = get.centroidSpectrum(pixel, plot=True)
            '''Now, ready to re-run the spectrum extraction task for aperture, and with better guess for the psf fitting'''
            self.pixel, self.aper_flux, center, psf_flux, sigma =\
            get.ExtractSpectrum2(mode = 'both',
                                  plot = self.plot,
                                  position = traceC,
                                  aperture = traceS)
            '''could measure it here instead of in method 1 only '''
            get.psf_voigt_flux = np.zeros(len(self.pixel))
            get.psf_voigt_fwhmL = np.zeros(len(self.pixel))
            get.psf_voigt_fwhmG = np.zeros(len(self.pixel))

        self.pixel = self.FramePos2RefPos(self.pixel)
        '''Determine dispersion relation'''
        if self.dispersion is True:
            self.wavelength = self.dispers.Pixel2Wavelength(self.pixel)
            '''calibration using synthetic flat'''
            if self.calibrate is True:
                flat.Calibrate(footprint, head)
        else:
            self.wavelength = np.zeros(len(self.pixel))
        '''Dump raw spectrum'''
        self.DumpSpectrum(
            head, ('pixel', 'w', 'aper_flux', 'psf_gauss_flux',
                   'psf_gauss_sigma', 'psf_gauss_mu', 'psf_moffat_flux',
                   'psf_moffat_x0', 'psf_moffat_gamma', 'integralGM',
                   'integralG', 'amplitude_0', 'x_0_0', 'gamma_0', 'alpha_0',
                   'amplitude_1', 'mean_1', 'stddev_1'),
            (self.pixel, self.wavelength, np.array(self.aper_flux),
             np.array(get.psf_gauss_flux), np.array(get.psf_gauss_sigma),
             np.array(get.psf_gauss_mu), np.array(get.psf_moffat_flux),
             np.array(get.psf_moffat_x0), np.array(get.psf_moffat_gamma),
             np.array(get.integralGM), np.array(
                 get.integralG), np.array(get.amplitude_0), np.array(
                     get.x_0_0), np.array(get.gamma_0), np.array(get.alpha_0),
             np.array(get.amplitude_1), np.array(
                 get.mean_1), np.array(get.stddev_1)),
            str("rawspectrum" + self.order + ".list"))

        return

コード例 #31

0

ファイルを表示

ファイル: wiktionary.py プロジェクト: vidraj/derinet

# Preprocessing and extraction
lexeme_data = dict()
with BZ2File(path) as xml_file:
    parser = ET.iterparse(xml_file)
    tkey = False
    for event, element in parser:
        if (element.tag[43:] == 'title'):  # lexeme
            title = element.text
        # unlock saving lexeme if it is lexeme
        elif (element.tag[43:] == 'ns') and (element.text == '0'):
            tkey = True
        # save informations about lexeme
        elif (element.tag[43:] == 'text') and (tkey is True):
            stat.add_active_lexeme()
            # extraction informations for lexeme
            data = extract(lang=par.l, data=element.text)
            if not (data is None):
                if not (data[1] == set()):
                    lexeme_data[title] = data
            tkey = False
        element.clear()

# # Testing
# with open(file=par.o, mode='w', encoding='utf-8') as f:
#     f.write('Number: ' + str(len(lexeme_data)) + '\n')
#     for i,j in lexeme_data.items():
#         f.write(i + '\n' + str(j) + '\n\n')
# wordlist = set()
# relations = list()
# for child,data in lexeme_data.items():
#     wordlist.add(child)

コード例 #32

0

ファイルを表示

ファイル: rest_api.py プロジェクト: nroad-gith-admin/document-extraction

    def post(self):
        '''
            Accept Method: POST
            Input:
            Output:
            Header: Content-Type: application/json
            :return:
            '''
        ###################### Status Code

        # logger.info("getRecommendation called")
        response = {
            "data": []
        }
        if self.request.method != 'POST':
            logger.error("getExtraction: Only accept POST request")
            response["status"] = BAD_REQUEST
            response["reason"] = "Only Accept POST request"
            print("Only Accept POST request")
            self.write(response)
            self.set_status(BAD_REQUEST)
        elif not self.request.headers['Content-Type'] == 'application/json':
            logger.error("getExtraction: Only  Accept Content-Type:application/json")
            response["status"] = BAD_REQUEST
            response["reason"] = "Only  Accept Content-Type:application/json"
            print("Only  Accept Content-Type:application/json")
            self.write(response)
            self.set_status(BAD_REQUEST)
        else:
            try:
                data = json_decode(self.request.body)
            except:
                logger.error(
                    'getExtraction: Content_Type should be applicatin/json,Expecting json data key as : ' + str(
                        parameters))
                response["status"] = BAD_REQUEST
                response["reason"] = 'Content_Type should be applicatin/json,Expecting json data key as : ' + str(
                    parameters)
                print('Content_Type should be application/json,Expecting json data key as : ' + str(parameters))
                self.write(response)
                self.set_status(BAD_REQUEST)
            else:
                data = dict((k.lower().strip(), v) for k, v in data.items())

                try:
                    for parameter in parameters_req:
                        if parameter not in data.keys():
                            raise Exception()
                except:
                    logger.error("getExtraction: Expecting key as : " + str(parameters))
                    response["status"] = BAD_REQUEST
                    response["reason"] = 'Expecting key as: ' + str(parameters)
                    print('Expecting key as: ' + str(parameters))
                    self.write(response)
                    self.set_status(BAD_REQUEST)
                else:

                    documents = {}
                    for idata in data['data']:
                        if "documentId" not in idata or "filePath" not in idata or 'bankname' not in idata\
                                or 'params' not in idata or 'dataPath' not in idata:
                            logger.error("getExtraction: expect documentId, filePath, bankname, dataPath and params as key")
                            response["status"] = BAD_REQUEST
                            response["reason"] = "Expect  filePath, dataPath, bankname,  and params as key"
                            print("Expect documentId, filePath, bankname and params as key")
                            self.write(response)
                            self.set_status(BAD_REQUEST)
                        else:

                            try:
                                extractedData = extract(idata["filePath"], idata['dataPath'],idata["bankname"],idata['params'], idata['documentId'])
                                extractedData["status"] = SUCCESS
                                extractedData["error"] = ""
                                extractedData["documentId"] = idata["documentId"]

                                response["data"].append(extractedData)
                            except Exception as e:
                                extractedData = {}
                                extractedData["status"] = INTERNAL_SERVER_ERROR
                                extractedData["error"] = str(e)
                                extractedData["documentId"] = idata["documentId"]
                                response["data"].append(extractedData)


                    self.write(response)
                    self.set_status(SUCCESS)

コード例 #33

0

ファイルを表示

ファイル: spectrum_old.py プロジェクト: guyonnet/slitless_image_reduction

    def TargetSpectrum(self, Direction='y', **kwargs):
        logging.info('Opening of image : ' + str(self.image))
        inst = instru.telinst(self.image, verbose='')
        image_data = inst.Image(self.image)
        head = inst.header
        name = head['OBJECT'] + head['RECID']
        saturation = np.max(image_data) * 0.8  # get saturation
        footprint  = Flip(image_data[self.y_start : self.y_end,\
                                     self.x_start : self.x_end], flip=self.flip)  # Select the footprint
        '''Subtract profile'''
        profile = []
        if (self.mask):
            profile, footprint_mask = self.Mask(inst, Direction)
            if (len(profile) != 0):  # subtract periodic background
                footprint = self.SubtractProfile(footprint, profile)
        '''Determine and write map of defects   '''
        '''if True, Correct for defective pixels'''
        if ((self.Map is True) and (self.dispersion is True)):
            defect_map = self.BuildDefectMap()
            hdu = pf.PrimaryHDU(defect_map)
            name = str("defect_map" + self.order + ".fits")
            hdu.writeto(os.path.join(self.out_dir, name), overwrite=True)
            footprint = footprint / defect_map
        ''' Write footprint before removing cosmics in case of other than aperture phot'''
        if not self.aperture:
            hdu = pf.PrimaryHDU(footprint)
            name = str("footprint" + self.order + ".fits")
            hdu.writeto(os.path.join(self.out_dir, name), overwrite=True)
        '''Extract a map of cosmic in footprint'''
        if (self.mask):
            mean, sigma = tb.BkgdStat(footprint, footprint_mask)
            cosmics = ri.filters(plot=self.plot)
            cosmicimage = cosmics.Cosmics2(footprint, self.seeing, mean, sigma)
            hdu = pf.PrimaryHDU(cosmicimage)
            name = str("cosmics" + self.order + ".fits")
            hdu.writeto(os.path.join(self.out_dir, name), overwrite=True)
        ''' Write footprint after removing cosmics in case of aperture phot'''
        if self.aperture:
            hdu = pf.PrimaryHDU(footprint)
            name = str("footprint" + self.order + ".fits")
            hdu.writeto(os.path.join(self.out_dir, name), overwrite=True)
        ''' Extract raw profile'''
        ''' Testing two versions:'''
        '''1st is constant aperture'''
        '''second used gaussian fit to determine center and width'''
        logging.info("Spectrum extraction method : " + str(self.method))

        get = ex.extract(self, footprint)

        if (self.method == 1):
            #self.pixel, self.aper_flux = self.ExtractSpectrum(footprint)
            self.pixel, self.aper_flux = get.ExtractSpectrum()

        if (self.method == 2):
            '''First pass to determine center from gaussian fit'''
            pixel, aper_flux, center, flux, sigma =\
            get.ExtractSpectrum2( mode = 'psf',
                                  plot = self.plot)
            '''The trace of the centroid of the spectrum'''
            traceC, traceS = get.centroidSpectrum(pixel, plot=True)
            '''Now, ready to re-run the spectrum extraction task for aperture, and with better guess for the psf fitting'''
            self.pixel, self.aper_flux, center, psf_flux, sigma =\
            get.ExtractSpectrum2(mode = 'both',
                                  plot = self.plot,
                                  position = traceC,
                                  aperture = traceS)
            '''could measure it here instead of in method 1 only '''
            get.psf_voigt_flux = np.zeros(len(self.pixel))
            get.psf_voigt_fwhmL = np.zeros(len(self.pixel))
            get.psf_voigt_fwhmG = np.zeros(len(self.pixel))

        self.pixel = self.FramePos2RefPos(self.pixel)
        '''Determine dispersion relation'''
        if self.dispersion is True:
            self.wavelength = self.Pixel2Wavelength(self.pixel)
            '''normalize using synthetic flat'''
            if self.calibrate is True:
                self.Calibrate(footprint, head)
        else:
            self.wavelength = np.zeros(len(self.pixel))
        '''Dump raw spectrum'''
        self.DumpSpectrum(
            head, ('pixel', 'w', 'aper_flux', 'psf_gauss_flux',
                   'psf_gauss_sigma', 'psf_gauss_mu', 'psf_voigt_flux',
                   'psf_voigt_fwhmL', 'psf_voigt_fwhmG'),
            (self.pixel, self.wavelength, np.array(self.aper_flux),
             np.array(get.psf_gauss_flux), np.array(get.psf_gauss_sigma),
             np.array(get.psf_gauss_mu), np.array(get.psf_voigt_flux),
             np.array(get.psf_voigt_fwhmL), np.array(get.psf_voigt_fwhmG)),
            str("rawspectrum" + self.order + ".list"))

        return