def test_is_instance(self, core_type_lib): assert PushInt.is_instance(5) assert PushInt.is_instance(np.int64(100)) assert not PushInt.is_instance("Foo") assert not PushInt.is_instance(np.str_("Bar")) assert not PushStr.is_instance(5) assert not PushStr.is_instance(np.int64(100)) assert PushStr.is_instance("Foo") assert PushStr.is_instance(np.str_("Bar"))
def makeHDF5(self, rewrite=False): """ Create the HDF5 structure if needed ... """ print("Initialization of HDF5 file") if os.path.exists(self.hdf5) and rewrite: os.unlink(self.hdf5) spath = self.hdf5path.split("/") assert len(spath) > 2 nxs = Nexus(self.hdf5, mode="w") entry = nxs.new_entry(entry=spath[0], program_name="pyFAI", title="diffmap") grp = entry for subgrp in spath[1:-2]: grp = nxs.new_class(grp, name=subgrp, class_type="NXcollection") processgrp = nxs.new_class(grp, "pyFAI", class_type="NXprocess") processgrp["program"] = numpy.array([numpy.str_(i) for i in sys.argv]) processgrp["version"] = numpy.str_(PyFAI_VERSION) processgrp["date"] = numpy.str_(get_isotime()) if self.mask: processgrp["maskfile"] = numpy.str_(self.mask) if self.flat: processgrp["flatfiles"] = numpy.array([numpy.str_(i) for i in self.flat]) if self.dark: processgrp["darkfiles"] = numpy.array([numpy.str_(i) for i in self.dark]) processgrp["inputfiles"] = numpy.array([numpy.str_(i) for i in self.inputfiles]) processgrp["PONIfile"] = numpy.str_(self.poni) processgrp["dim0"] = self.npt_slow processgrp["dim0"].attrs["axis"] = "Rotation" processgrp["dim1"] = self.npt_fast processgrp["dim1"].attrs["axis"] = "Translation" processgrp["dim2"] = self.npt_rad processgrp["dim2"].attrs["axis"] = "Diffraction" for k, v in self.ai.getPyFAI().items(): if "__len__" in dir(v): processgrp[k] = numpy.str_(v) elif v: processgrp[k] = v self.group = nxs.new_class(grp, name=spath[-2], class_type="NXdata") if posixpath.basename(self.hdf5path) in self.group: self.dataset = self.group[posixpath.basename(self.hdf5path)] else: self.dataset = self.group.create_dataset( name=posixpath.basename(self.hdf5path), shape=(self.npt_slow, self.npt_fast, self.npt_rad), dtype="float32", chunks=(1, self.npt_fast, self.npt_rad), maxshape=(None, None, self.npt_rad)) self.dataset.attrs["signal"] = "1" self.dataset.attrs["interpretation"] = "spectrum" self.dataset.attrs["axes"] = str(self.unit).split("_")[0] self.dataset.attrs["creator"] = "pyFAI" self.dataset.attrs["long_name"] = "Diffraction imaging experiment" self.nxs = nxs
def find_cliche(self,datapath,filename): """ Calculates the most common words (cliches) in csv Removes the punctuations from csv TF-IDF to calculate most common words Extracting most common words """ data = self.common.read_csv(datapath,filename) ##speechtext = data.speechtext.str.replace(r'[^\w\s\,?]','') #Removing all panctuations from speech text speechtext = data.speechtext.str.lower() #Using tf idf to find words or tokens that are less important vectorizer = TfidfVectorizer(decode_error='replace',stop_words='english',encoding='utf-8') tfidf = vectorizer.fit_transform(speechtext.apply(lambda x: np.str_(x))) terms = vectorizer.get_feature_names() sums = tfidf.sum(axis=0) data = [] for col, term in enumerate(terms): data.append( (term, sums[0,col] )) ranking = pd.DataFrame(data, columns=['term','rank']) cliches = ranking.sort_values('rank', ascending=False).nlargest(25, 'rank') found_cliches = cliches.term.values #print(found_cliches) return found_cliches
def test_string(self): lr = LogisticRegression() for col in ['features', u'features', np.str_('features')]: lr.setFeaturesCol(col) self.assertEqual(lr.getFeaturesCol(), 'features') self.assertRaises(TypeError, lambda: LogisticRegression(featuresCol=2.3))
def test_parse_to_string_convertible(self): try_to_convert = partial(self._try_to_convert, cv.utils.dumpString) for convertible in (None, '', 's', 'str', str(123), np.str('test1'), np.str_('test2')): expected = 'string: ' + (convertible if convertible else '') actual = try_to_convert(convertible) self.assertEqual(expected, actual, msg=get_conversion_error_msg(convertible, expected, actual))
def testStringReduce2D(self): # Create a 2D array of strings x = np.asarray([["", "", "a", "", "", "b"], ["", "c", "", "d", "", ""], ["e", "", "f", "", "", ""]]) self._compare(x, None, keepdims=False, zero=np.str_("")) self._compare(x, [], keepdims=False, zero=np.str_("")) self._compare(x, [0], keepdims=False, zero=np.str_("")) self._compare(x, [1], keepdims=False, zero=np.str_("")) self._compare(x, [0, 1], keepdims=False, zero=np.str_("")) self._compare(x, None, keepdims=True, zero=np.str_("")) self._compare(x, [], keepdims=True, zero=np.str_("")) self._compare(x, [0], keepdims=True, zero=np.str_("")) self._compare(x, [0, 1], keepdims=True, zero=np.str_(""))
def test_character_assignment(self): # This is an example a function going through CopyObject which # used to have an untested special path for scalars # (the character special dtype case, should be deprecated probably) arr = np.zeros((1, 5), dtype="c") arr[0] = np.str_("asdfg") # must assign as a sequence assert_array_equal(arr[0], np.array("asdfg", dtype="c")) assert arr[0, 1] == b"s" # make sure not all were set to "a" for both
def entrainer(self, root): self.cv = TfidfVectorizer(min_df=1, stop_words='english') self.x_train = self.cv.fit_transform( self.df_x_train.apply(lambda x: np.str_(x))) self.x_test = self.cv.transform( self.df_x_test.apply(lambda x: np.str_(x))) self.mb = svm.SVC() try: self.mb.fit(self.x_train, self.df_y_train) print("entrinement terminé") except MemoryError: messagebox.showerror('la mémoire est insuffisante ', message=' baisser le pourcentage et ressayer') root.destroy()
def test___new___list_of_numpy(self): assert Vector([np.bool_(True)]).is_boolean() assert Vector([np.datetime64(DATE)]).is_datetime() assert Vector([np.datetime64(DATETIME)]).is_datetime() assert Vector([np.float_(0.5)]).is_float() assert Vector([np.int_(1)]).is_integer() assert Vector([np.object_(np)]).is_object() assert Vector([np.str_("")]).is_string()
def check_numpy_scalar_argument_return_string_2(self): f = PyCFunction('foo') f += Variable('a', 'npy_str', 'in, out') f += 'a.data[0] = \'H\';' foo = f.build() s = numpy.str_('hey') assert_equal(foo(s), 'Hey') assert_equal(s, 'hey')
def predictScore(self, reviews): # 清洗 并 分词 reviews = [self.trim(review) for review in reviews] print(reviews) tfidf = self.tfidftransformer.transform( self.vectorizer.transform([np.str_(review) for review in reviews])) pred = self.clf.predict_proba(tfidf) return pred
def check_numpy_scalar_argument_return_string_2(self): f = PyCFunction('foo') f += Variable('a', 'npy_str', 'in, out') f += 'a.data[0] = \'H\';' foo = f.build() s = numpy.str_('hey') assert_equal(foo(s),'Hey') assert_equal(s,'hey')
def setUp(self): pass self.b_lit = b'bytes literal' self.s_lit = 'literal literal' self.u_lit = u'unicode literal' self.np_b_lit = np.bytes_('numpy bytes literal') self.np_s_lit = np.str_('numpy unicode literal') self.np_u_lit = np.unicode_('numpy unicode literal')
def __init__(self,value=0): if self.__class__.__name__ == 'Scalar': raise TypeError("cannot create 'Scalar' instances") if self.__class__.__name__ == 'String': self._value = _N.str_(_ver.tostr(value)) return if isinstance(value,(_data.Data)): value = value.data() self._value = _N.__dict__[self.__class__.__name__.lower()](value)
def save_data(fname, prob, npz_file=True, mat_file=True): # Remove file extension froot = os.path.splitext(fname)[0] # Get all OpenMDAO inputs and outputs into a dictionary var_dict = prob.model.list_inputs(values=True, prom_name=False, units=True, out_stream=None) out_dict = prob.model.list_outputs(values=True, prom_name=False, units=True, out_stream=None) var_dict.extend(out_dict) # Pickle the full archive so that we can load it back in if we need with open(froot + '.pkl', 'wb') as f: pickle.dump(var_dict, f) # Reduce to variables we can save for matlab or python if npz_file or mat_file: array_dict = {} for k in range(len(var_dict)): unit_str = var_dict[k][1]['units'] if unit_str is None or unit_str == 'Unavailable': unit_str = '' elif len(unit_str) > 0: unit_str = '_' + unit_str iname = var_dict[k][0] + unit_str value = var_dict[k][1]['value'] if type(value) in [ type(np.array([])), type(0.0), type(0), np.float64, np.int64 ]: array_dict[iname] = value elif type(value) == type(True): array_dict[iname] = np.bool_(value) elif type(value) == type(''): array_dict[iname] = np.str_(value) elif type(value) == type([]): temp_val = np.empty(len(value), dtype=np.object) temp_val[:] = value[:] array_dict[iname] = temp_val #else: # print(var_dict[k]) # Save to numpy compatible if npz_file: kwargs = {key: array_dict[key] for key in array_dict.keys()} np.savez_compressed(froot + '.npz', **kwargs) # Save to matlab compatible if mat_file: sio.savemat(froot + '.mat', array_dict, long_field_names=True)
def __getitem__(self, key): return_grid = self._grid.copy() if isinstance(key[0], slice): start = key[0].start stop = key[0].stop for i, i_index in enumerate(self._flowstruct.outer_index): if i_index == start: start_i = i if i_index == stop: stop_i = i if stop_i < start_i: tmp = stop_i stop_i = start_i start_i = tmp outer_list = self._flowstruct.outer_index[start_i:stop_i + 1] elif isinstance(key[0], list): outer_list = key[0] else: outer_list = [key[0]] if isinstance(key[1], slice): start = key[1].start stop = key[1].stop for i, i_index in enumerate(self._flowstruct.inner_index): if i_index == start: start_i = i if i_index == stop: stop_i = i if stop_i < start_i: tmp = stop_i stop_i = start_i start_i = tmp inner_list = self._flowstruct.inner_index[start_i:stop_i + 1] elif isinstance(key[1], list): inner_list = key[1] else: inner_list = [key[1]] keys = list(itertools.product(outer_list, inner_list)) if len(keys) > 1: keys = [key for key in keys if key in self._flowstruct.index] for k in keys: data = self._flowstruct[k] if len(set(outer_list)) < 2: k = k[1] return_grid.cell_arrays[np.str_(k)] = data.flatten() return return_grid
def main(): path_bc03 = './' # location of the BC03 .ised_ASCII files path_output = './' # location of the output # define the bc03 SED file res = 'lr' # resolution Z = '62' # metallicity sfh = 'tau5' # star formation history imf = 'chab' # imf dust = '_dust00' # dust sedfile = 'bc2003_' + res + '_m' + Z + '_' + imf + '_' + sfh + dust + '.ised_ASCII' outputfile = 'extracted_' + sedfile # extract the spectra from the .ised_ASCII. It will skip this if it finds a previously made # file extracted_<sedfile> in the foler specified by <path_output> if os.path.isfile(path_output + outputfile): print("Extracted file ", path_output + outputfile, " found. Good!") else: print("Extracted file ", path_output + outputfile, " does not yet exist. Building it...") process_ised_ascii_file(path_bc03, sedfile, path_output, outputfile) # read the extracted file. It will return: # nages : the number of ages for which there is a spectrum # specs_age : the list of ages of the spectra # nwavelengths : the number of wavelengths in each spectrum # specs_wavelengths : the list of wavelengths for each spectrum # specs_flux : an array where each column i corresponds to the fluxes of the ith age specs_age, specs_flux, specs_wavelength, nwavelengths, nages = import_spectra( path_output + outputfile) print("Extracted file ", path_output + outputfile, " read.") print("Numer of age bins: ", len(specs_age)) print("Numer of wavelength bins: ", len(specs_wavelength)) # select a spectrum of a certain age in years age_wanted = 1.e8 # for 100 Myr age_bin = find_closest_value(specs_age, age_wanted) age_myr = specs_age[age_bin][0] / 1.e6 # this is your spectrum lam = specs_wavelength flux = (specs_flux[:, age_bin]).flatten() # plot the spectrum plt.figure() plt.xscale('log') plt.yscale('log') plt.xlabel('Wavelength ($\\AA$)') plt.ylabel('$F_\\lambda$ (units)') title = sedfile age_info = "Age: " + np.str_(age_myr) + ' Myr' plt.plot(lam, flux) plt.text(1e4, 0.1 * np.max(flux), age_info, fontsize=20) plt.title(title) plt.show()
def transform(self, X, y=None, **params): X_ = X.copy() X_ = X_.explode("libelle_auteur") # Normalize libelle_auteur on specific autor names def replace_batch_auteur(list_of_s: list, replace: str): for s in list_of_s: X_["libelle_auteur"] = X_["libelle_auteur"].apply( lambda x: x.replace(s, replace)) replace_batch_auteur( ["M. Édouard Philippe", "M. Edouard Philippe", "M. Jean Castex"], "Gouvernement", ) replace_batch_auteur(["Mme x", "M. XXX"], "Anonyme") # Add a slug column by removing accents and setting it lowercase X_["slug"] = X_["libelle_auteur"].apply(self._normalize_txt) # Try to merge with self.actors on several version of the lusgs va_merge_1 = X_.merge(self.actors, how="inner", left_on="slug", right_on="slug_1") va_merge_2 = X_.merge(self.actors, how="inner", left_on="slug", right_on="slug_2") va_merge_3 = X_.merge(self.actors, how="inner", left_on="slug", right_on="slug_3") # Special case with "Gouvernement", that is not in self.actors va_merge_4 = X_.merge( pd.DataFrame({ "slug": ["gouvernement"], "membre_parti": ["Gouvernement"] }), on="slug", ) # Merge all the joins together va_merge = va_merge_1.append(va_merge_2).append(va_merge_3).append( va_merge_4) va_merge.rename({"membre_parti": "auteur_parti"}, axis=1, inplace=True) # Reverse the explosion made over X, using a groupby. X_ = (va_merge.groupby("vote_uid").agg({ "auteur_parti": lambda x: x.tolist() }).reset_index()) # Drop non-relevant column X_ = X_[["vote_uid", "auteur_parti"]] # print(X_.head(5)) # print(X.head(5)) # Join with the original dataframe X = X.merge(X_, how="left", on="vote_uid") X["auteur_parti"].fillna("[NAN]", inplace=True) X["auteur_parti"] = X["auteur_parti"].apply(lambda x: np.str_(x)) return X
def __init__(self, dataset): self.dataset = dataset self.additional_stopwords = ['rt'] self.preprocess() self.vector = TfidfVectorizer(ngram_range=(1, 2), max_df=0.75, min_df=5, max_features=10000) self.tfidf = self.vector.fit_transform( dataset['processed_text'].apply(lambda x: np.str_(x)))
def get_TF_IDF(data): words = data['words'].apply(lambda x: np.str_(x)) vectorizer = CountVectorizer( ) # 该类会将文本中的词语转换为词频矩阵,矩阵元素a[i][j] 表示j词在i类文本下的词频 transformer = TfidfTransformer() # 该类会统计每个词语的tf-idf权值 tfidf = transformer.fit_transform(vectorizer.fit_transform( words)) # 第一个fit_transform是计算tf-idf,第二个fit_transform是将文本转为词频矩阵 weight = tfidf.toarray() # 将tf-idf矩阵抽取出来,元素a[i][j]表示j词在i类文本中的tf-idf权重 return weight, data['label']
def predict_input(input, df): vectorizer = CountVectorizer(ngram_range=(1, 2)) vectorizer.fit_transform(df["lemma"].apply(lambda x: np.str_(x))) enc = vectorizer.transform([input]) results = [] for mod in MODELS: model = MODELS.get(mod) pred = model.predict(enc)[0] results.append([mod, pred]) return results
def test_isscalar_numpy_array_scalars(self): self.assertTrue(lib.isscalar(np.int64(1))) self.assertTrue(lib.isscalar(np.float64(1.))) self.assertTrue(lib.isscalar(np.int32(1))) self.assertTrue(lib.isscalar(np.object_('foobar'))) self.assertTrue(lib.isscalar(np.str_('foobar'))) self.assertTrue(lib.isscalar(np.unicode_(u('foobar')))) self.assertTrue(lib.isscalar(np.bytes_(b'foobar'))) self.assertTrue(lib.isscalar(np.datetime64('2014-01-01'))) self.assertTrue(lib.isscalar(np.timedelta64(1, 'h')))
def test_isscalar_numpy_array_scalars(self): self.assertTrue(is_scalar(np.int64(1))) self.assertTrue(is_scalar(np.float64(1.))) self.assertTrue(is_scalar(np.int32(1))) self.assertTrue(is_scalar(np.object_('foobar'))) self.assertTrue(is_scalar(np.str_('foobar'))) self.assertTrue(is_scalar(np.unicode_(u('foobar')))) self.assertTrue(is_scalar(np.bytes_(b'foobar'))) self.assertTrue(is_scalar(np.datetime64('2014-01-01'))) self.assertTrue(is_scalar(np.timedelta64(1, 'h')))
def test_scalar_none_comparison(self): # Scalars should still just return false and not give a warnings. with warnings.catch_warnings(record=True) as w: warnings.filterwarnings('always', '', FutureWarning) assert_(not np.float32(1) == None) assert_(not np.str_('test') == None) # This is dubious (see below): assert_(not np.datetime64('NaT') == None) assert_(np.float32(1) != None) assert_(np.str_('test') != None) # This is dubious (see below): assert_(np.datetime64('NaT') != None) assert_(len(w) == 0) # For documentaiton purpose, this is why the datetime is dubious. # At the time of deprecation this was no behaviour change, but # it has to be considered when the deprecations is done. assert_(np.equal(np.datetime64('NaT'), None))
def test_isscalar_numpy_array_scalars(self): self.assertTrue(lib.isscalar(np.int64(1))) self.assertTrue(lib.isscalar(np.float64(1.0))) self.assertTrue(lib.isscalar(np.int32(1))) self.assertTrue(lib.isscalar(np.object_("foobar"))) self.assertTrue(lib.isscalar(np.str_("foobar"))) self.assertTrue(lib.isscalar(np.unicode_(u("foobar")))) self.assertTrue(lib.isscalar(np.bytes_(b"foobar"))) self.assertTrue(lib.isscalar(np.datetime64("2014-01-01"))) self.assertTrue(lib.isscalar(np.timedelta64(1, "h")))
def test_is_scalar_numpy_array_scalars(self): assert is_scalar(np.int64(1)) assert is_scalar(np.float64(1.0)) assert is_scalar(np.int32(1)) assert is_scalar(np.object_("foobar")) assert is_scalar(np.str_("foobar")) assert is_scalar(np.unicode_("foobar")) assert is_scalar(np.bytes_(b"foobar")) assert is_scalar(np.datetime64("2014-01-01")) assert is_scalar(np.timedelta64(1, "h"))
def _ndarray_dtype(fields): """ Return the NumPy structured array data type Helper function """ return [ (np.str_(key), values) for key, values in fields ]
def predict(self, papers): """ Generates predictions from the trained classifiers. Each binary classifier is applied once. Parameters ========== papers : pd.DataFrame papers that we want to classify. Required column: tokens_baseline - previously tokenized title-abstract Returns ======= scores : pd.DataFrame Dataframe containing the predictions generated by each model. Each column corresponds to a review group and the values in that column are the probabilities that each paper belong to that review group. """ scores = {} tokenized_papers = list(papers[self.tokens_col]) # get vectorizer and determine tfidf for papers vec = self.vectorizer X = vec.transform(tokenized_papers) if self.tokens_col2 is not None: tokenized_papers2 = papers[self.tokens_col2].apply( lambda x: np.str_(x)) # get vectorizer and determine tfidf for papers vec2 = self.vectorizer2 X2 = vec2.transform(tokenized_papers2) X = hstack([X, X2]) for model_group in tqdm(self.models, desc='Test Review Groups'): # get the classifier classifier = self.models[model_group] # predictions as probabilities y_preds = classifier.predict_proba(X) probabilities = y_preds[:, 1] # store scores of model scores[model_group] = probabilities scores = pd.DataFrame.from_dict(scores) return scores
def sample_factory(freq='D', start_date='2001-01-01', value_generator=np.random.normal, timeseries_prefix="T", array_prefix="A", scalar_prefix="S", missing_prefix="M", number_of_arrays=20, number_of_timeseries=26, number_of_missings=5, number_of_scalars=15, length_of_data=100, dataset=None): if not dataset: dataset = InformationSet() M = max(number_of_timeseries,number_of_arrays,number_of_missings) fmt = "%s%0"+str(len(str(M-1)))+"d" # Timeseries for i in range(number_of_timeseries): Sname = fmt % (timeseries_prefix,i) np.random.seed(i) values=value_generator(0,1,length_of_data) per = period_range(start_date,periods=len(values),freq=freq) series = Series(values,index=per) dataset[Sname]=series # Arrays for i in range(number_of_arrays): Sname = fmt % (array_prefix,i) np.random.seed(i) values=value_generator(0,1,length_of_data) series = Series(values) dataset[Sname]=series # Scalars for i in range(number_of_scalars): Sname = fmt % (scalar_prefix,i) np.random.seed(i) if i % 2: values=np.float_(value_generator(0,1,1)) else: values=np.str_(value_generator(0,1,1)) series = Series(values) dataset[Sname]=series # Also add some missings if number_of_missings>0: dataset.add_missing(*[ fmt % (missing_prefix,t) for t in range(number_of_missings)]) return dataset
def get_vectorizer(column, X, ngram_range, tokenizer=False): if tokenizer: vectorizer = TfidfVectorizer(max_features=4000, stop_words='english', ngram_range=ngram_range, tokenizer=tokenize) else: vectorizer = TfidfVectorizer(max_features=4000, ngram_range=ngram_range) vectorizer.fit(X[column].apply(lambda x: np.str_(x))) return vectorizer
def __str__(self): ret_str = str(self.x_size) + " x " + str(self.y_size) + \ " simulation grid:" for y in range(self.y_size): ret_str += "\n[" for x in range(self.x_size): if x > 0: ret_str += ", " ret_str += numpy.str_(str(round(self.grid[x, y].state))) ret_str += ']' return ret_str
def test_scalar_none_comparison(self): # Scalars should still just return False and not give a warnings. # The comparisons are flagged by pep8, ignore that. with warnings.catch_warnings(record=True) as w: warnings.filterwarnings("always", "", FutureWarning) assert_(not np.float32(1) == None) assert_(not np.str_("test") == None) # This is dubious (see below): assert_(not np.datetime64("NaT") == None) assert_(np.float32(1) != None) assert_(np.str_("test") != None) # This is dubious (see below): assert_(np.datetime64("NaT") != None) assert_(len(w) == 0) # For documentation purposes, this is why the datetime is dubious. # At the time of deprecation this was no behaviour change, but # it has to be considered when the deprecations are done. assert_(np.equal(np.datetime64("NaT"), None))
def convert2np(x): if x == "nan": return np.nan elif type(x) is float: return np.float64(x) elif type(x) is int: return np.int64(x) elif type(x) is str: return np.str_(x) else: return x
def _serialize_tensor_value( value: Any, type_spec: computation_types.TensorType ) -> Tuple[executor_pb2.Value, computation_types.TensorType]: """Serializes a tensor value into `executor_pb2.Value`. Args: value: A Numpy array or other object understood by `tf.make_tensor_proto`. type_spec: A `tff.TensorType`. Returns: A tuple `(value_proto, ret_type_spec)` in which `value_proto` is an instance of `executor_pb2.Value` with the serialized content of `value`, and `ret_type_spec` is the type of the serialized value. The `ret_type_spec` is the same as the argument `type_spec` if that argument was not `None`. If the argument was `None`, `ret_type_spec` is a type determined from `value`. Raises: TypeError: If the arguments are of the wrong types. ValueError: If the value is malformed. """ original_value = value if tf.is_tensor(value): if isinstance(value, tf.Variable): value = value.read_value() if tf.executing_eagerly(): value = value.numpy() else: # Attempt to extract the value using the current graph context. with tf.compat.v1.Session() as sess: value = sess.run(value) # If we got a string or bytes scalar, wrap it in numpy so it has a dtype and # shape. if isinstance(value, bytes): value = np.bytes_(value) elif isinstance(value, str): value = np.str_(value) else: value = np.asarray(value) if not tf.TensorShape(value.shape).is_compatible_with(type_spec.shape): raise TypeError(f'Cannot serialize tensor with shape {value.shape} to ' f'shape {type_spec.shape}.') if value.dtype != type_spec.dtype.as_numpy_dtype: try: value = value.astype(type_spec.dtype.as_numpy_dtype, casting='same_kind') except TypeError as te: value_type_string = py_typecheck.type_string(type(original_value)) raise TypeError( f'Failed to serialize value of Python type {value_type_string} to ' f'a tensor of type {type_spec}.\nValue: {original_value}' ) from te value_proto = _value_proto_for_np_array(value, type_spec) return value_proto, type_spec
def test_scalar_comparison_to_none(self): # Scalars should just return False and not give a warnings. # The comparisons are flagged by pep8, ignore that. with warnings.catch_warnings(record=True) as w: warnings.filterwarnings("always", "", FutureWarning) assert_(not np.float32(1) == None) assert_(not np.str_("test") == None) # This is dubious (see below): assert_(not np.datetime64("NaT") == None) assert_(np.float32(1) != None) assert_(np.str_("test") != None) # This is dubious (see below): assert_(np.datetime64("NaT") != None) assert_(len(w) == 0) # For documentation purposes, this is why the datetime is dubious. # At the time of deprecation this was no behaviour change, but # it has to be considered when the deprecations are done. assert_(np.equal(np.datetime64("NaT"), None))
def test_astype(self): import numpy as np a = np.bool_(True).astype(np.float32) assert type(a) is np.float32 assert a == 1.0 a = np.bool_(True).astype('int32') assert type(a) is np.int32 assert a == 1 a = np.str_('123').astype('int32') assert type(a) is np.int32 assert a == 123
def test_ends_with(self): run_test_ends_with(self.strings, self.test_strings, self.delim) run_test_ends_with(self.strings, self.test_strings, np.str_(self.delim)) run_test_ends_with(self.strings, self.test_strings, str.encode(str(self.delim))) # Test gremlins delimiters g = self._get_ak_gremlins() run_test_ends_with(g.gremlins_strings, g.gremlins_test_strings, ' ') run_test_ends_with(g.gremlins_strings, g.gremlins_test_strings, '"') with self.assertRaises(AssertionError): self.assertFalse(run_test_ends_with(g.gremlins_strings, g.gremlins_test_strings, ''))
def predictSpam(): if request.method == 'POST': text = '' text = str(request.form['txtmessage']) if text: df = prep.create_dataframe(text) df.drop(['text'], inplace=True, axis=1) #input_vectors = vec.create_vectors(df) x_vectors = x_vector.transform( df['cleaned_text'].apply(lambda x: np.str_(x))) #combine features selected_features = df.columns[1:] feature_set1 = df[selected_features] #converting panda frame=feature_set1 to compress sparse notatation input_vectors = hstack( [x_vectors, csr_matrix(feature_set1)], "csr") #vectors = X_vector.fit_transform(df).toarrary() prediction = model.predict(input_vectors) output = round(prediction[0]) if not text: return render_template( 'index.html', prediction_text="Sorry, input is required for analysis!") else: ''' if output == 0: return jsonify({output:"This is legit message"}) else: return jsonify({output:"This is Spam message"}) ''' if output == 0: return render_template( 'index.html', prediction_text= "This is legit message! Model Predicted Value = " + str(output)) else: return render_template( 'index.html', prediction_text= "This is spam. Be careful! Model Predicted Value = " + str(output)) else: return render_template('index.html')
def test_builtin(self): import numpy as np assert int(np.str_("12")) == 12 exc = raises(ValueError, "int(np.str_('abc'))") assert exc.value.message.startswith("invalid literal for int()") assert int(np.uint64((2 << 63) - 1)) == (2 << 63) - 1 exc = raises(ValueError, "int(np.float64(np.nan))") assert str(exc.value) == "cannot convert float NaN to integer" exc = raises(OverflowError, "int(np.float64(np.inf))") assert str(exc.value) == "cannot convert float infinity to integer" assert int(np.float64(1e100)) == int(1e100) assert long(np.float64(1e100)) == int(1e100) assert int(np.complex128(1e100 + 2j)) == int(1e100) exc = raises(OverflowError, "int(np.complex64(1e100+2j))") assert str(exc.value) == "cannot convert float infinity to integer" assert int(np.str_("100000000000000000000")) == 100000000000000000000 assert long(np.str_("100000000000000000000")) == 100000000000000000000 assert float(np.float64(1e100)) == 1e100 assert float(np.complex128(1e100 + 2j)) == 1e100 assert float(np.str_("1e100")) == 1e100 assert float(np.str_("inf")) == np.inf assert str(float(np.float64(np.nan))) == "nan" assert oct(np.int32(11)) == "013" assert oct(np.float32(11.6)) == "013" assert oct(np.complex64(11 - 12j)) == "013" assert hex(np.int32(11)) == "0xb" assert hex(np.float32(11.6)) == "0xb" assert hex(np.complex64(11 - 12j)) == "0xb" assert bin(np.int32(11)) == "0b1011" exc = raises(TypeError, "bin(np.float32(11.6))") assert "index" in exc.value.message exc = raises(TypeError, "len(np.int32(11))") assert "has no len" in exc.value.message assert len(np.string_("123")) == 3
def now_name(name_base, name_extension, time_step=1e6): """ insert a time stamp into the filename_ before .extension Args: name_base: file name first part - may include directory path name_extension: file extension without a period time_step: minimum time between two time stamps Returns: time_stamped_file_name: concatenation of the inputs with time-stamp """ nstr = np.str_(int(time.time() * time_step)) time_stamped_file_name = name_base + '_' + nstr + '.' + name_extension return time_stamped_file_name
def test_dataframe_roundtrip(self): if self.should_skip: return self.skip('pandas is not importable') df = pd.DataFrame({ 'an_int': np.int_([1, 2, 3]), 'a_float': np.float_([2.5, 3.5, 4.5]), 'a_nan': np.array([np.nan] * 3), 'a_minus_inf': np.array([-np.inf] * 3), 'an_inf': np.array([np.inf] * 3), 'a_str': np.str_('foo'), 'a_unicode': np.unicode_('bar'), 'date': np.array([np.datetime64('2014-01-01')] * 3), 'complex': np.complex_([1 - 2j, 2 - 1.2j, 3 - 1.3j]), # TODO: the following dtypes are not currently supported. # 'object': np.object_([{'a': 'b'}]*3), }) decoded_df = self.roundtrip(df) assert_frame_equal(decoded_df, df)
def test_series_roundtrip(self): if self.should_skip: return self.skip('pandas is not importable') ser = pd.Series({ 'an_int': np.int_(1), 'a_float': np.float_(2.5), 'a_nan': np.nan, 'a_minus_inf': -np.inf, 'an_inf': np.inf, 'a_str': np.str_('foo'), 'a_unicode': np.unicode_('bar'), 'date': np.datetime64('2014-01-01'), 'complex': np.complex_(1 - 2j), # TODO: the following dtypes are not currently supported. # 'object': np.object_({'a': 'b'}), }) decoded_ser = self.roundtrip(ser) assert_series_equal(decoded_ser, ser)
def test_generic_roundtrip(self): values = [ np.int_(1), np.int32(-2), np.float_(2.5), np.nan, -np.inf, np.inf, np.datetime64('2014-01-01'), np.str_('foo'), np.unicode_('bar'), np.object_({'a': 'b'}), np.complex_(1 - 2j) ] for value in values: decoded = self.roundtrip(value) assert_equal(decoded, value) self.assertTrue(isinstance(decoded, type(value)))
def test_multindex_dataframe_roundtrip(self): if self.should_skip: return self.skip('pandas is not importable') df = pd.DataFrame({ 'idx_lvl0': ['a', 'b', 'c'], 'idx_lvl1': np.int_([1, 1, 2]), 'an_int': np.int_([1, 2, 3]), 'a_float': np.float_([2.5, 3.5, 4.5]), 'a_nan': np.array([np.nan] * 3), 'a_minus_inf': np.array([-np.inf] * 3), 'an_inf': np.array([np.inf] * 3), 'a_str': np.str_('foo'), 'a_unicode': np.unicode_('bar'), }) df = df.set_index(['idx_lvl0', 'idx_lvl1', ]) decoded_df = self.roundtrip(df) assert_frame_equal(decoded_df, df)
def test_generic_roundtrip(self): if self.should_skip: return self.skip("numpy is not importable") values = [ np.int_(1), np.int32(-2), np.float_(2.5), np.nan, -np.inf, np.inf, np.datetime64("2014-01-01"), np.str_("foo"), np.unicode_("bar"), np.object_({"a": "b"}), np.complex_(1 - 2j), ] for value in values: decoded = self.roundtrip(value) assert_equal(decoded, value) self.assertTrue(isinstance(decoded, type(value)))
def decode_table(data, encoding='ascii', native=True): ''' Decode byte strings in a table into unicode strings Args: data : numpy structured array or astropy Table Options: encoding : encoding to use for converting bytes into unicode; default 'ascii'; if None, try ENCODING keyword in data instead native : if True (default), only decode if native str type is unicode (i.e. python3 but not python2) Note: `encoding` option overides data.meta['ENCODING']; use encoding=None to use data.meta['ENCODING'] instead ''' from astropy.table import Table import numpy as np try: table = Table(data, copy=False) except ValueError: #- https://github.com/astropy/astropy/issues/5298 table = Table(data, copy=True) #- Check if native str type is bytes if native and np.str_('a').dtype.kind == 'S': return table encoding = _pick_encoding(table, encoding) for col in table.colnames: dtype = table[col].dtype if dtype.kind == 'S': Un = 'U{}'.format(_dtype_size(dtype)) table.replace_column(col, np.char.decode(table[col], encoding=encoding).astype(Un)) table.meta['ENCODING'] = encoding return table
Creator: Phil Bentley """ __version_info__ = (0, 0, 8, 'beta', 0) __version__ = "%d.%d.%d-%s" % __version_info__[0:4] import sys, os, logging, types import ply.lex as lex from ply.lex import TOKEN import ply.yacc as yacc import netCDF4 as nc4 import numpy as np # default fill values for netCDF-3 data types (as defined in netcdf.h include file) NC_FILL_BYTE = np.int8(-127) NC_FILL_CHAR = np.str_('\0') NC_FILL_SHORT = np.int16(-32767) NC_FILL_INT = np.int32(-2147483647) NC_FILL_FLOAT = np.float32(9.9692099683868690e+36) # should get rounded to 9.96921e+36 NC_FILL_DOUBLE = np.float64(9.9692099683868690e+36) # miscellaneous constants as defined in the ncgen3.l file FILL_STRING = "_" XDR_INT_MIN = -2147483648 XDR_INT_MAX = 2147483647 # netcdf to numpy data type map NC_NP_DATA_TYPE_MAP = { 'byte': 'b', 'char': 'c', 'short': 'h',
# crop to 100x100 result = result.crop((50, 50, 150, 150)) # random brightness enhancer = ImageEnhance.Brightness(result) result = enhancer.enhance(random.random() + 0.5) # save result to HDF5 DB dset = f.create_dataset('%07d' % x, (100, 100), dtype='uint8') dset[...] = np.array(result) # update mean datasetMean += dset[...].astype('double') / N # set attributes for grayscale images dset.attrs['CLASS'] = np.str_('IMAGE') dset.attrs['VERSION'] = np.str_('1.2') dset.attrs['IMAGE_SUBCLASS'] = np.str_('IMAGE_GRAYSCALE') dset.attrs['IMAGE_WHITE_IS_ZERO'] = np.uint8(0) # save attributes for training dset.attrs['HAS_SPHERE'] = np.uint8(hasSphere) if (hasSphere): dset.attrs['RADIUS'] = np.float(sphereDiameter / 2) dset.attrs['CENTER_X'] = np.float(sphereCenter[0] - 50) dset.attrs['CENTER_Y'] = np.float(sphereCenter[1] - 50) except IOError as e: print('I/O Error(%d): %s' % (e.errno, e.strerror)) x += 1
for item in feat_lst: attr = item['name'].split('/')[-2].split('_')[0].split('-')[-1] video = item['name'].split('/')[-2].split('_')[1] frame = item['frame'] # feature -> scores feature_key = '_'.join([attr, video, frame]) try: score_key = '_'.join([attr, video, index.data[feature_key]]) # The score score = prof.data[tracker_type][score_key] # The feature feat_name = feature_dir+ feat_type+'/'+attr+'_'+video+'-'+frame+'.'+feat_type feature = _upack_feature_(feat_name) svm_feature = _convert_svm_format_(feature) # Gen_svm line = np.str_(score[0]) + ' ' + svm_feature + '\n' output_list.append(line) output.write(line) count = count +1 except: pass #break print 'Done,', count, 'features saved in', output_name output.close() output_train = open(output_name + '_train', 'w+') output_test = open(output_name + '_test', 'w+') shuffle(output_list)
np.bool_().dtype.num : 'bit', np.uint8().dtype.num : 'unsignedByte', np.int16().dtype.num : 'short', np.int32().dtype.num : 'int', np.int64().dtype.num : 'long', np.complex64().dtype.num : 'floatComplex', np.complex128().dtype.num : 'doubleComplex', np.unicode_().dtype.num : 'unicodeChar' } # numpy 1.4.1 doesn't have a "bytes_" type if hasattr(np, 'bytes_'): numpy_dtype_to_field_mapping[np.bytes_().dtype.num] = 'char' else: numpy_dtype_to_field_mapping[np.str_().dtype.num] = 'char' def _all_bytes(column): for x in column: if not isinstance(x, bytes): return False return True def _all_unicode(column): for x in column: if not isinstance(x, unicode): return False return True
abaixo = get_pixel(img, x, y+1) values = limiar(centro, [acima_esquerda, acima, acima_direita, direta, abaixo_direita, abaixo, abaixo_esquerda, esquerda]) weights = [1, 2, 4, 8, 16, 32, 64, 128] res = 0 for a in range(0, values.__len__()): res += weights[a] * values[a] img_lbp[x, y] = res return img_lbp # Selecionando as imagens n_query = 2 # 1 - 2 n_image = 8 # 1 - 8 img_aux_1 = cv2.imread('../imagens/query_' + str_(n_query) + '.jpg', 0) img_aux_2 = cv2.imread('../imagens/query_' + str_(n_query) + '.jpg', 0) img_aux_2 = cv2.imread('../imagens/texture_sample_' + str_(n_image) + '.jpg', 0) img_lbp = cv2.imread('../imagens/query_' + str_(n_query) + '.jpg', 0) amostra_lbp = cv2.imread('../imagens/texture_sample_' + str_(n_image) + '.jpg', 0) img_lbp = get_lbp(img_aux_1, img_lbp) amostra_lbp = get_lbp(img_aux_2, amostra_lbp) plt.subplot(121), plt.imshow(img_lbp, cmap='gray') plt.title('LBP Query ' + str_(n_query)), plt.xticks([]), plt.yticks([]) plt.subplot(122), plt.imshow(amostra_lbp, cmap='gray') plt.title('LBP Sample ' + str_(n_image)), plt.xticks([]), plt.yticks([]) plt.show()
def test_numpy_dtype_string(): """ Test Python2/3 string compatibility with Python data type """ np.empty(1, dtype=[(np.str_('a'), '?')])
def test_string_boxes(self): from numpy import str_ assert isinstance(str_(3), str_) assert str_(3) == '3' assert str(str_(3)) == '3' assert repr(str_(3)) == "'3'"
def test_numpy_str(self): array = self.h5file.get_node(numpy.str_('/'), numpy.str_('a')) self.assertEqual(array.shape, (3, 1))
def test_accepts_numpy_string(self): numpy_string = numpy.str_("this is a numpy string!") a = A() a.string = numpy_string self.assertEqual(a.string, numpy_string) self.assertIs(type(a.string), str)
return pd.CategoricalIndex(data, categories=cats, ordered=idx.ordered, name=idx.name) elif typ is pd.MultiIndex: levels = [_nonempty_index(i) for i in idx.levels] labels = [[0, 0] for i in idx.levels] return pd.MultiIndex(levels=levels, labels=labels, names=idx.names) raise TypeError("Don't know how to handle index of " "type {0}".format(type(idx).__name__)) _simple_fake_mapping = { 'b': np.bool_(True), 'V': np.void(b' '), 'M': np.datetime64('1970-01-01'), 'm': np.timedelta64(1), 'S': np.str_('foo'), 'a': np.str_('foo'), 'U': np.unicode_('foo'), 'O': 'foo' } def _scalar_from_dtype(dtype): if dtype.kind in ('i', 'f', 'u'): return dtype.type(1) elif dtype.kind == 'c': return dtype.type(complex(1, 0)) elif dtype.kind in _simple_fake_mapping: o = _simple_fake_mapping[dtype.kind] return o.astype(dtype) if dtype.kind in ('m', 'M') else o else: