Beispiel #1
0
 def handle_data(self, data):
     if len(self.flag) != 0:
         if self.flag[-1] == "span":
             # print(str(data))
             self.span += data
             self.flag.pop()
         elif self.flag[-1] == "strong":
             self.strong = data
             self.flag.pop()
         elif self.flag[-1] == "houseName":
             # print(str(data))
             self.houseName.append(str(strip(data)))
             self.flag.pop()
         elif self.flag[-1] == "villageName":
             # print(str(data))
             self.villageName.append(str(strip(data)))
             self.flag.pop()
         elif self.flag[-1] == "houseTotlePrice_2":
             # print(str(data))
             self.houseTotlePrice.append(self.strong + data)
             self.strong = ""
             self.flag.pop()
         elif self.flag[-1] == "houseUnitPrice":
             # print(str(data))
             self.houseUnitPrice.append(data)
             self.flag.pop()
Beispiel #2
0
 def ejecutar(self, tabla, arbol):
     super().ejecutar(tabla,arbol)
     resultado = self.valor.ejecutar(tabla,arbol)
     if isinstance(resultado, Excepcion):
         return resultado
     if self.valor.tipo.tipo== Tipo_Dato.CHAR or self.valor.tipo.tipo== Tipo_Dato.VARCHAR or self.valor.tipo.tipo== Tipo_Dato.VARYING or self.valor.tipo.tipo== Tipo_Dato.CHARACTER or self.valor.tipo.tipo== Tipo_Dato.TEXT:
         self.tipo = Tipo(Tipo_Dato.TEXT)
         return strip(str(resultado)) 
     error = Excepcion('42883',"Semántico",f"No existe la función TRIM({self.valor.tipo.toString()})",self.linea,self.columna)
     arbol.excepciones.append(error)
     arbol.consola.append("HINT: Ninguna función coincide en el nombre y tipos de argumentos. Puede ser necesario agregar conversión explícita de tipos.")
     arbol.consola.append(error.toString())
     return error
Beispiel #3
0
def remove_cols(data, skip_cols):
	conv = []
	colnr = 0
	for col in data:
		if colnr % 200 == 0:
			print('processing column {0:d}...'.format(colnr))
			gc.collect()
		if colnr not in skip_cols:
			col = strip(col, '"')
			col = replace(col, '', '0')
			col = replace(col, 'NA', '0')
			col = replace(col, 'false', '0')
			col = replace(col, 'true', '1')
			conv.append(col.astype(int16))
		colnr += 1
	gc.collect()
	return array(conv)
Beispiel #4
0
def remove_cols(data, skip_cols):
    conv = []
    colnr = 0
    for col in data:
        if colnr % 200 == 0:
            print('processing column {0:d}...'.format(colnr))
            gc.collect()
        if colnr not in skip_cols:
            col = strip(col, '"')
            col = replace(col, '', '0')
            col = replace(col, 'NA', '0')
            col = replace(col, 'false', '0')
            col = replace(col, 'true', '1')
            conv.append(col.astype(int16))
        colnr += 1
    gc.collect()
    return array(conv)
Beispiel #5
0
 def ejecutar(self, tabla, arbol):
     super().ejecutar(tabla, arbol)
     if isinstance(self.valor, Primitivo):
         if self.valor.tipo.tipo == Tipo_Dato.CHAR or self.valor.tipo.tipo == Tipo_Dato.VARCHAR or self.valor.tipo.tipo == Tipo_Dato.VARYING or self.valor.tipo.tipo == Tipo_Dato.CHARACTER or self.valor.tipo.tipo == Tipo_Dato.TEXT:
             return strip(str(self.valor.valor))
     elif isinstance(self.valor, Identificador):
         print("FALTA PROGRAMAR PARA IDENTIFICADOR TRIM")
     error = Excepcion(
         '42883', "Semántico",
         f"No existe la función TRIM({self.valor.tipo.toString()})",
         self.linea, self.columna)
     arbol.excepciones.append(error)
     arbol.consola.append(
         "HINT: Ninguna función coincide en el nombre y tipos de argumentos. Puede ser necesario agregar conversión explícita de tipos."
     )
     arbol.consola.append(error.toString())
     return error
Beispiel #6
0
def to_ints_only(data):
    conv = []
    failed = []
    colnr = 0
    for col in data:
        colnr += 1
        if colnr % 100 == 0:
            print('converting column {0:d}...'.format(colnr))
        col = strip(col, '"')
        col = replace(col, '', '0')
        col = replace(col, 'NA', '0')
        col = replace(col, 'false', '0')
        col = replace(col, 'true', '1')
        try:
            irow = col.astype(int16)
        except ValueError as err:
            skiprows.append(colnr - 1)
            failed.append(str(err).split(':', 1)[1])
        except OverflowError as err:
            print(str(err))
            skiprows.append(colnr - 1)
        # except OverflowError as err:
        # 	print(str(err))
        # 	print('will look for overflow error value...')
        # 	for v in col:
        # 		try:
        # 			v.astype(int)
        # 		except:
        # 			print 'overflow:', v
        else:
            conv.append(irow)
        del col
        gc.collect()  # free memory
    print('failed for (excluding overflows): "{0:s}"'.format(
        '", "'.join(failed)))
    print('{0:d} columns removed'.format(len(failed)))
    return array(conv)
Beispiel #7
0
def to_ints_only(data):
	conv = []
	failed = []
	colnr = 0
	for col in data:
		colnr += 1
		if colnr % 100 == 0:
			print('converting column {0:d}...'.format(colnr))
		col = strip(col, '"')
		col = replace(col, '', '0')
		col = replace(col, 'NA', '0')
		col = replace(col, 'false', '0')
		col = replace(col, 'true', '1')
		try:
			irow = col.astype(int16)
		except ValueError as err:
			skiprows.append(colnr - 1)
			failed.append(str(err).split(':', 1)[1])
		except OverflowError as err:
			print(str(err))
			skiprows.append(colnr - 1)
		# except OverflowError as err:
		# 	print(str(err))
		# 	print('will look for overflow error value...')
		# 	for v in col:
		# 		try:
		# 			v.astype(int)
		# 		except:
		# 			print 'overflow:', v
		else:
			conv.append(irow)
		del col
		gc.collect()  # free memory
	print('failed for (excluding overflows): "{0:s}"'.format('", "'.join(failed)))
	print('{0:d} columns removed'.format(len(failed)))
	return array(conv)
Beispiel #8
0
    infile = 'suchyta-papers-2.bib'
    selectedfile = 'suchyta-papers-selected-modified.bib'
    additionalfile = 'suchyta-papers-additional-modified.bib'

    with open(infile) as f:
        bib = bibtexparser.load(f)
    with open(infile) as f:
        selected = bibtexparser.load(f)
    with open(infile) as f:
        additional = bibtexparser.load(f)

    spos = 0
    apos = 0
    for j in range(len(bib.entries)):
        authors = npchar.strip(bib.entries[j]['author'].split(' and'))
        select = True
        omit = False

        if len(authors) > num:
            authors = authors[0:num]
            last = 'et~al.'
            if me not in authors:
                last = '%s (including %s)' % (last, ME)
                select = False
            if bib.entries[j][u'ID'] in [
                    '2012SPIE.8451E..12H', '2015AJ....150..150F',
                    '2016JPhCS.759a2095K', 'choi2016stream'
            ]:
                select = True
            authors = np.append(authors, last)
Beispiel #9
0
def strip(iterable, *args, **kwargs):
    from numpy.core.defchararray import strip
    for item in iterable:
        yield strip(item, *args, **kwargs)
    return width, height


def modis_url(time, extent, resolution):
    """
    time: utc time in iso format EG: 2020-02-19T00:00:00Z extent: [lower_latitude, left_longitude, higher_latitude,
    right_longitude], EG: [51.46162974683544,-22.94768591772153,53.03698575949367,-20.952234968354432] resolution:
    represents the pixel resolution, i.e. km/pixel. Should be a value from this list: [0.03, 0.06, 0.125, 0.25, 0.5,
    1, 5, 10]
    """
    width, height = calculate_width_height(extent, resolution)
    extent = ','.join(map(lambda x: str(x), extent))
    return width, height, URL.format(width, height, extent, time)


invalid_dates = ['2002-01-28', '2002-05-11', '2002-05-12', '2005-10-31']
directory = '/Users/yaoxiaoyi/Desktop/HLD'
save_dir = '/Users/yaoxiaoyi/Desktop/HLD_Images'
for filename in os.listdir(directory):
    shpfile = directory + '/' + filename + '/' + filename
    sh = shapefile.Reader(shpfile)
    date = filename[19:29]
    #if date not in invalid_dates:
    time = date + 'T00:00:00Z'
    width, height, url = modis_url(
        time, [sh.bbox[1], sh.bbox[0], sh.bbox[3], sh.bbox[2]], .25)
    response = requests.get(strip(url))
    img = BytesIO(response.content)
    im = Image.open(img)
    im.save(save_dir + '/' + filename + '.jpg')
Beispiel #11
0
def predictSentiment():
    if request.method == 'POST':
        try:
            prediction = ""
            # Reading the inputs given by the user
            text = request.form['tweetText']

            if strip(text) != "":

                # Get Google sentiment
                gScore= googleSentiment(text)
                gSentiment = {"Score": float("{:.2f}".format((gScore.score) * 100)),
                              "Magnitude" : float("{:.2f}".format(gScore.magnitude))}
                
                # Get Sentiment
                if gScore.score > 0:
                    gSentimentT ="Positive"
                elif gScore.score < 0:
                    gSentimentT ="Negative"
                else:
                    gSentimentT ="Netural"

                # Get Veder sentiment
                sVeder= vaderSentiment(text)
                print(sVeder)
                vSentiment= {"Positive" : float("{:.2f}".format((sVeder['pos']) * 100)),
                             "Netural": float("{:.2f}".format((sVeder['neu']) * 100)),
                             "Negative": float("{:.2f}".format((sVeder['neg']) * 100)),
                             "Compound": float("{:.2f}".format((sVeder['compound']) * 100))
                             }

                # Get Sentiment
                if sVeder['compound'] >= 0.05:
                    vSentimenT ="Positive"
                elif sVeder['compound'] <= - 0.05:
                    vSentimenT ="Negative"
                else:
                    vSentimenT ="Netural"                

                # Get textBlob sentiment
                tScore= textBlobSentiment(text)
                tSentiment = {"Polarity": float("{:.2f}".format((tScore.polarity)* 100)),
                              "Subjectivity": float("{:.2f}".format((tScore.subjectivity)* 100))}
                
                # Get Sentiment
                if tScore.polarity > 0:
                    tSentimentT ="Positive"
                elif tScore.polarity < 0:
                    tSentimentT ="Negative"
                else:
                    tSentimentT ="Netural"   

                # Loading the model file
                loaded_model = pickle.load(open(model_file_name, 'rb'))
                tfidf_model = pickle.load(open(tfidf_model_file_name, "rb"))

                # Transform input value
                pred_data = tfidf_model.transform([text])

                # Predict sentiment - 3 models for 3 sentiments
                positive = (float("{:.1f}".format(loaded_model[0].predict_proba(pred_data)[0][1])) * 100)
                negative = (float("{:.1f}".format(loaded_model[1].predict_proba(pred_data)[0][1])) * 100)
                neutral = (float("{:.1f}".format(loaded_model[2].predict_proba(pred_data)[0][1])) * 100)

                if positive > 50:
                    sentText = "Positive"
                elif negative > 50:
                    sentText = "Negative"
                else:
                    sentText = "Neutral"

                # Build a dictionary to return values
                prediction = {"Positive": positive, "Negative": negative, "Neutral": neutral}

                # Print prediction
                print('prediction is', prediction)

                # Show the prediction results in a UI
                return render_template('demo.html', prediction=prediction,
                                       gSentiment= gSentiment,vSentiment=vSentiment,tSentiment=tSentiment,
                                       gSentimentT= gSentimentT,vSentimentT=vSentimenT,tSentimentT=tSentimentT,
                                       sText=text, sentText=sentText)
            else:
                return render_template('demo.html', prediction='', sText='')

        except Exception as e:
            print('The Exception message is: ', e)
            return render_template('demo.html', prediction='')
    else:
        return render_template('demo.html')
Beispiel #12
0
      pagede = requests.get("https://www.@@@@@@@@@@@@@@@.it/ricette-cat/page" + str(page) + "/" + str(porta) + "/" + str(theme))

      soup = BeautifulSoup(pagede.text, 'html.parser')
      ricette_div = soup.find_all("div", class_="gz-content-recipe-horizontal")

      for ricetta_soup in ricette_div:

        name = ricetta_soup.h2.a.text
        ricette.append(name)

        descrizione = ricetta_soup.find('div', class_='gz-description').text
        descrizioni.append(descrizione)

        difficolta = ricetta_soup.find('div', class_="gz-col-flex gz-double gz-mTop10").text[25:37]
        difficolta_list.append(strip(difficolta))

        portata_list.append(porta)
        thematics_list.append(theme)

      #sleep(randint(2,7))
  print("nuova portata...", datetime.datetime.now())

ricette = pd.DataFrame({
'Ricetta': ricette, 'Descrizione': descrizioni, 'Difficoltà': difficolta_list, 'Portata': portata_list, 'Tipo': thematics_list
})

ricette.head()

ricette.to_csv(path, sep=",", encoding="utf-16")
def receiveUserInput():
    Tk().withdraw()
    return strip(str(simpledialog.askstring(title="Input Dialog",
                                            prompt="Insert a character:\n Empty character = #")))
Beispiel #14
0
def work(in_train_arch,
         in_test_arch,
         in_train_csv,
         in_test_csv,
         out_h5):

    from pypipes import unzip,as_key,del_key,getitem,setitem
    from nppipes import (genfromtxt,
                         place,astype,as_columns,label_encoder,fit_transform,
                         transform,stack
                         )
    from nppipes import take as np_take
    from numpy.core.defchararray import strip
    from numpy import s_,mean,in1d,putmask
    from collections import Counter
    from h5pipes import h5new


    @P.Pipe
    def replace_missing_with(iterable, ftor):
        from numpy import isnan
        for item in iterable:
            for i in range(item.shape[1]):
                mask = isnan(item[:, i])
                value = ftor(item[~mask, i])
                item[mask, i] = value
                pass
            yield item


    missing_cidx = [11, 14, 16, 28, 33, 34, 35, 36, 37, 46, 51, 60, 68]
    unseen_nominal_cidx = [2, 12, 38, 69, 74]
    seen_nominal_cidx = [0, 1, 4, 5, 6, 13, 15, 17, 18, 19, 20, 21, 22, 23,
                 24, 25, 26, 27, 29, 30, 31, 32, 39, 40, 41, 42, 43, 44, 45,
                 47, 48, 49, 50, 52, 53, 54, 55, 56, 57, 58, 59,
                 61, 62, 63, 64, 65, 66, 67, 70, 71, 72, 73, 75, 76, 77]
    nominal_cidx = seen_nominal_cidx + unseen_nominal_cidx


    data = (
        in_train_arch
        | unzip(in_train_csv)
        | genfromtxt(delimiter=',', dtype=str)
        | place(lambda d: d == '', 'nan')
        | as_key('train')
        | as_key('train_col_names', lambda d: strip(d['train'][0], '"'))
        | as_key('train_labels',    lambda d: d['train'][1:, 0].astype(int))
        | as_key('train_X',         lambda d: d['train'][1:, 1:-1])
        | as_key('train_y',         lambda d: d['train'][1:, -1].astype(int))
        | del_key('train')


        | as_key('test', lambda d:
                in_test_arch
                | unzip(in_test_csv)
                | genfromtxt(delimiter=',', dtype=str)
                | place(lambda d: d == '', 'nan')
                | P.first
                )
        | as_key('test_col_names', lambda d: strip(d['test'][0], '"'))
        | as_key('test_labels',    lambda d: d['test'][1:, 0].astype(int))
        | as_key('test_X',         lambda d: d['test'][1:, 1:])
        | del_key('test')

        | as_key('train_X', lambda d:
                (d['train_X'],)
                | np_take(missing_cidx, axis=1)
                | astype(float)

                | replace_missing_with(mean)

                | astype(str)
                | setitem(d['train_X'].copy(), s_[:, missing_cidx])
                | P.first
                )

        | as_key('label_encoders', lambda d:
                len(nominal_cidx)
                | label_encoder
                | P.as_tuple
                )

        | as_key('train_X', lambda d:
                (d['train_X'],)
                | np_take(nominal_cidx, axis=1)
                | as_columns
                | fit_transform(d['label_encoders'])
                | stack(axis=1)
                | setitem(d['train_X'].copy(), s_[:, nominal_cidx])
                | P.first
                )

        | as_key('test_X', lambda d:
                (d['test_X'],)
                | np_take(seen_nominal_cidx, axis=1)
                | as_columns
                | transform(d['label_encoders'][:-len(unseen_nominal_cidx)])
                | stack(axis=1)
                | setitem(d['test_X'].copy(), s_[:, seen_nominal_cidx])
                | P.first
                )

        | as_key('test_X', lambda d:
                (d['test_X'],)
                | np_take(unseen_nominal_cidx, axis=1)
                | as_key('test_unseen_nominals_features')

                | as_key('test_unseen_nominals', lambda d2:
                        zip(d2['test_unseen_nominals_features'].T,
                            d['label_encoders'][-len(unseen_nominal_cidx):])
                        | P.select(lambda t: list(set(t[0]) - set(t[1].classes_)))
                        | P.as_list
                        )

                | as_key('train_most_common_nominals', lambda d2:
                        zip(d['train_X'][:, unseen_nominal_cidx].T.astype(int),
                            d['label_encoders'][-len(unseen_nominal_cidx):])
                        | P.select(lambda t: t[1].inverse_transform(t[0]))
                        | P.select(lambda s: Counter(s).most_common(1)[0][0])
                        | P.as_list
                        )

                | as_key('test_corrected_features', lambda d2:
                        zip(d2['test_unseen_nominals_features'].copy().T,
                            d2['test_unseen_nominals'],
                            d2['train_most_common_nominals'])
                        | P.select(lambda t: putmask(t[0], in1d(t[0], t[1]), t[2]) or t[0].T)
                        | stack(axis=1)
                        | P.first
                        )

                | getitem('test_corrected_features')
                | as_columns
                | transform(d['label_encoders'][-len(unseen_nominal_cidx):])
                | stack(axis=1)
                | setitem(d['test_X'].copy(), s_[:, unseen_nominal_cidx])
                | P.first
                )

        | del_key('label_encoders')

        | as_key('test_X', lambda d:
                (d['test_X'],)
                | np_take(missing_cidx, axis=1)
                | astype(float)

                | replace_missing_with(mean)

                | astype(str)
                | setitem(d['test_X'].copy(), s_[:, missing_cidx])
                | P.first
                )

        | P.first
        )

    #print(data.keys())

    (
        (out_h5,)
        | h5new
        | as_key('train_X',         lambda _: data['train_X'].astype(float))
        | as_key('train_y',         lambda _: data['train_y'].astype(float))
        | as_key('test_X',          lambda _: data['test_X'].astype(float))
        | as_key('train_labels',    lambda _: data['train_labels'])
        | as_key('test_labels',     lambda _: data['test_labels'])
        | P.first
    )

    return