def render_GET(self, request): # No authz check, we allow everyone to fetch service list host = get_headers(request, 'host') if not host: log.msg('Client did not send proper host header.', system='sgas.TopResource') return # FIXME this returns 500... # stuff needed for being cooperative with a reverse proxy # note: once loggers get updated to understands path referrel # (currently they only understand complete URLs) these hacks # can be removed - this will probably be in the beginning of 2011 :-) if has_headers(request, 'x-forwarded-port'): host += ':' + get_headers(request, 'x-forwarded-port') is_secure = request.isSecure() if get_headers(request, 'x-forwarded-protocol') == 'https': is_secure = True basepath = '/'.join(request.prepath) baseurl = self._createBaseURL(host, is_secure, basepath) #print "BASEURL", baseurl tree = self._createServiceTree(baseurl) ts = XML_HEADER + ET.tostring(tree) + "\n" return ts
def editor(): source = request.args.get('source', default=None) original_file_name = source.split('/')[-1] file_type = request.args.get('format', default="csv") callback_url = request.args.get('callback', default="") error_msg = None warning_msg = None file_name = "" if source is None or source.strip() == '': headers = ["AAA", "BBB", "CCC"] # elif source.startswith('file:/'): # original_file_name = source.split('/')[-1] # headers = util.get_headers(source, file_type=file_type) # file_name = source.split('/')[-1].split('.')[0] + "-" + util.get_random_string(4) + "." + source.split('.')[-1] # if headers == []: # warning_msg = "Can't parse the source file %s" % source else: if DOWNLOAD: r = requests.get(source, allow_redirects=True) if r.status_code == 200: fname = source.split('/')[-1].split( '.')[0] + "-" + util.get_random_string( 4) + "." + source.split('.')[-1] file_name = fname uploaded_file_dir = os.path.join(UPLOAD_DIR, fname) f = open(uploaded_file_dir, 'w') f.write(r.content) f.close() headers = util.get_headers(uploaded_file_dir, file_type=file_type) if headers == []: warning_msg = "Can't parse the source file %s" % source else: error_msg = "the source %s can not be accessed" % source print error_msg headers = [] else: headers = util.get_headers(source, file_type=file_type) if headers == []: warning_msg = "Can't parse the source file %s" % source # if callback_url: # files = {'upload_file': open('file.txt', 'rb')} # values = {'DB': 'photcat', 'OUT': 'csv', 'SHORT': 'short'} # r = requests.post(url, files=files, data=values) # return render_template('msg.html', msg="Your mappings has been sent", msg_title="Result") # else: f = open(os.path.join(DATA_DIR, "labels.txt")) return render_template('editor.html', labels_txt=f.read(), headers=headers, callback=callback_url, file_name=original_file_name, error_msg=error_msg, warning_msg=warning_msg)
def import_data(f, record_id, target_id, id=False): '''imports the data and converts it to X (input) and y (output) data vectors''' rows = read_csv(f) # save column names as headers, save indices of record and target IDs headers = util.get_headers(rows.next()) try: record_col = headers.index(record_id) target_col = headers.index(target_id) except: print 'The specified instance ID was not found as column name. Manually check input file for correct instance ID column.' return False, False, False # save and split records print ' ...(loading)' if not id: records = [row[1:] for row in rows] headers = headers[1:-1] else: records = [row for row in rows] headers = headers[0:-1] print ' ...(converting to matrix)' records = np.matrix(records) X = records[:, 0:-1] # features # output y = records[:, -1] # target y = np.squeeze(np.asarray(y.astype(np.float))) print ' ...(converting data type)' X = X.astype(np.float64, copy=False) y = y.astype(np.float64, copy=False) return X, y, headers
def import_data(f, record_id, target_id): '''imports the data and converts it to X (input) and y (output) data vectors''' rows = read_csv(f) # save column names as headers, save indices of record and target IDs headers = util.get_headers(rows.next()) try: record_col = headers.index(record_id) target_col = headers.index(target_id) except: print 'The specified instance ID was not found as column name. Manually check input file for correct instance ID column.' return False, False, False # save and split records print ' ...(loading)' records = [row[1:] for row in rows] print ' ...(converting to matrix)' records = np.matrix(records) X = records[:,0:-1] # features headers = headers[1:-1] # output y = records[:,-1] # target y=np.squeeze(np.asarray(y.astype(np.int))) print ' ...(converting data type)' X = X.astype(np.float64, copy=False) y = y.astype(np.float64, copy=False) return X, y, headers
def getSubject(request): """ Utility method for extracting the subject name from a twisted.web.http.Request """ # identity forwarded by reverse proxy if request.getClientIP() in LOOPBACK_ADDRESSES and has_headers(request, X_SSL_SUBJECT): return get_headers(request, X_SSL_SUBJECT) # request wasn't secure or no certificate was presented return None
def getSubject(request): """ Utility method for extracting the subject name from a twisted.web.http.Request """ # identity forwarded by reverse proxy if request.getClientIP() in LOOPBACK_ADDRESSES and has_headers( request, X_SSL_SUBJECT): return get_headers(request, X_SSL_SUBJECT) # request wasn't secure or no certificate was presented return None
def main(couch_params, main_db, avatar_db, master_params, wikifeat_home): # Set up credentials credentials = util.get_credentials(couch_params.adminuser, couch_params.adminpass) global gh, ph gh = util.get_headers(credentials) ph = util.put_headers(credentials) global wf_dir wf_dir = wikifeat_home # Establish a connection to couchdb conn = util.get_connection(couch_params.use_ssl, couch_params.host, couch_params.port) conn.connect() setup_main_db(conn, main_db) setup_user_db(conn) setup_avatar_db(conn, avatar_db) if master_params.skip_master is False: create_master_user(conn, master_params)
def getHostname(request): """ Utility method for getting hostname of client. """ if request.getClientIP() in LOOPBACK_ADDRESSES and has_headers(request, X_FORWARDED_FOR): # nginx typically returns ip addresses addr = get_headers(request, X_FORWARDED_FOR) if isIPAddress(addr): # we really shouldn't do such blocking calls in twisted, # but the twisted dns interface is rather terrible and # odd things happen when using it # Set timeout to 1 second to limit the possible damage try: socket.setdefaulttimeout(1) info = socket.gethostbyaddr(addr) return info[0] except socket.error, msg: log.msg("Error performing reverse lookup: %s" % msg) return addr else: addr
def getHostname(request): """ Utility method for getting hostname of client. """ if request.getClientIP() in LOOPBACK_ADDRESSES and has_headers( request, X_FORWARDED_FOR): # nginx typically returns ip addresses addr = get_headers(request, X_FORWARDED_FOR) if isIPAddress(addr): # we really shouldn't do such blocking calls in twisted, # but the twisted dns interface is rather terrible and # odd things happen when using it # Set timeout to 1 second to limit the possible damage try: socket.setdefaulttimeout(1) info = socket.gethostbyaddr(addr) return info[0] except socket.error, msg: log.msg("Error performing reverse lookup: %s" % msg) return addr else: addr
def insert_artist_genres(): conn, cursor = util.connect2RDS() headers = util.get_headers(client_id, client_secret) cursor.execute("SELECT id FROM artists") artists = [] for (id, ) in cursor.fetchall(): artists.append(id) artist_batch = [artists[i:i + 50] for i in range(0, len(artists), 50)] artist_genres = [] for i in artist_batch: ids = ','.join(i) URL = "https://api.spotify.com/v1/artists/?ids={}".format(ids) r = requests.get(URL, headers=headers) raw = json.loads(r.text) for artist in raw['artists']: for genre in artist['genres']: artist_genres.append({ 'artist_id': artist['id'], 'genre': genre }) for data in artist_genres: util.insert_row(cursor, data, 'artist_genres') conn.commit() cursor.close() sys.exit(0)
def insert_artists(): conn, cursor = util.connect2RDS() headers = util.get_headers() artists = [] with open('artist_list.csv') as f: raw = csv.reader(f) for row in raw: artists.append(row[0]) for a in artists: params = {"q": a, "type": "artist", "limit": "1"} r = requests.get("https://api.spotify.com/v1/search", params=params, headers=headers) raw = json.loads(r.text) artist = {} try: artist_raw = raw['artists']['items'][0] if artist_raw['name'] == params['q']: artist.update({ 'id': artist_raw['id'], 'name': artist_raw['name'], 'followers': artist_raw['followers']['total'], 'popularity': artist_raw['popularity'], 'url': artist_raw['external_urls']['spotify'], 'image_url': artist_raw['images'][0]['url'] }) util.insert_row(cursor, artist, 'artists') except: logging.error('something worng') continue conn.commit() sys.exit(0)
def editor(): if 'format' in request.form: file_type = request.form['format'] else: file_type = 'csv' if 'callback' in request.form: callback_url = request.form['callback'] else: callback_url = "" ontologies = request.form.getlist('ontologies') if len(ontologies) == 0: return render_template('msg.html', msg="You should select at least one ontology", msg_title="Error") print("number of ontologies: " + str(len(ontologies))) print(ontologies) print(request.form) error_msg = None warning_msg = None uploaded = False if 'source' not in request.form or request.form['source'].strip() == "": if 'sourcefile' in request.files: sourcefile = request.files['sourcefile'] if sourcefile.filename != "": original_file_name = sourcefile.filename filename = secure_filename(sourcefile.filename) fname = util.get_random_string(4) + "-" + filename uploaded_file_dir = os.path.join(UPLOAD_DIR, fname) if not os.path.exists(UPLOAD_DIR): os.mkdir(UPLOAD_DIR) sourcefile.save(uploaded_file_dir) uploaded = True else: print("blank source file") else: print('not sourcefile') if not uploaded: return render_template('msg.html', msg="Expecting an input file", msg_title="Error") else: source = request.form['source'] original_file_name = source.split('/')[-1] filename = secure_filename(original_file_name) r = requests.get(source, allow_redirects=True) if r.status_code == 200: fname = util.get_random_string(4) + "-" + filename uploaded_file_dir = os.path.join(UPLOAD_DIR, fname) f = open(uploaded_file_dir, 'w') f.write(r.content) f.close() else: error_msg = "the source %s can not be accessed" % source print(error_msg) return render_template('msg.html', msg=error_msg, msg_title="Error") headers = util.get_headers(uploaded_file_dir, file_type=file_type) if headers == []: error_msg = "Can't parse the source file " return render_template('msg.html', msg=error_msg, msg_title="Error") labels = util.get_classes_as_txt(ontologies, data_dir=DATA_DIR) # f = open(os.path.join(DATA_DIR, "labels.txt")) return render_template('editor.html', labels_txt=labels, ontologies_txt=",".join(ontologies), headers=headers, callback=callback_url, file_name=fname, error_msg=error_msg, warning_msg=warning_msg)
def headers(conf_path): return get_headers(conf_path) + ["issued eai", "currency seat ordinal"]
def toS3(): conn, cursor = util.connect2RDS() headers = util.get_headers() cursor.execute("SELECT id FROM artists") top_track_keys = { 'id':'id', 'name':'name', 'popularity':'popularity', 'external_urls':'external_urls.spotify' } top_tracks = [] for(id, ) in cursor.fetchall(): URL = "https://api.spotify.com/v1/artists/{}/top-tracks".format(id) params = { 'country':'US' } r = requests.get(URL, params = params, headers = headers) raw = json.loads(r.text) for i in raw['tracks']: top_track = {} for k, v in top_track_keys.items(): top_track.update({k:jsonpath.jsonpath(i,v)[0]}) top_track.update({'artist_id':id}) top_tracks.append(top_track) top_tracks = pd.DataFrame(top_tracks) top_tracks.to_parquet('top-tracks.parquet',engine='pyarrow',compression='snappy') track_ids = [top_tracks.loc[i,'id'] for i in top_tracks.index] dt = datetime.datetime.utcnow().strftime("%Y-%m-%d") s3 = boto3.resource('s3') object = s3.Object('kihong-spotify-lambda','top-tracks/dt={}/top-tracks.parquet'.format(dt)) data = open('top-tracks.parquet','rb') object.put(Body=data) ######################################################################################################## # # # ######################################################################################################## tracks_batch = [track_ids[i:i+100] for i in range(0,len(track_ids),100)] audio_features = [] for i in tracks_batch: ids = ','.join(i) URL = "https://api.spotify.com/v1/audio-features/?ids={}".format(ids) r = requests.get(URL,headers=headers) raw = json.loads(r.text) audio_features.extend(raw['audio_features']) audio_features = pd.DataFrame(audio_features) audio_features.to_parquet('audio-features.parquet',engine='pyarrow',compression='snappy') object = s3.Object('kihong-spotify-lambda', 'audio-features/dt={}/audio-features.parquet'.format(dt)) data = open('audio-features.parquet', 'rb') object.put(Body=data)
def editor(): if 'format' in request.form: file_type = request.form['format'] else: file_type = 'csv' if 'callback' in request.form: callback_url = request.form['callback'] else: callback_url = "" kg = None if 'kg' in request.form: if request.form['kg'].strip() != "": kg = request.form['kg'].strip() ontologies = request.form.getlist('ontologies') if len(ontologies) == 0: return render_template( 'msg.html', msg="You should select at least one ontology", msg_title="Error") logger.debug("number of ontologies: " + str(len(ontologies))) logger.debug(str(ontologies)) logger.debug(str(request.form)) error_msg = None warning_msg = None uploaded = False if 'source' not in request.form or request.form['source'].strip( ) == "": if 'sourcefile' in request.files: sourcefile = request.files['sourcefile'] if sourcefile.filename != "": original_file_name = sourcefile.filename filename = secure_filename(sourcefile.filename) fname = util.get_random_string(4) + "-" + filename uploaded_file_dir = os.path.join(UPLOAD_DIR, fname) if not os.path.exists(UPLOAD_DIR): os.makedirs(UPLOAD_DIR) sourcefile.save(uploaded_file_dir) uploaded = True else: logger.debug("blank source file") else: logger.debug('not sourcefile') if not uploaded: return render_template('msg.html', msg="Expecting an input file", msg_title="Error") else: source = request.form['source'] original_file_name = source.split('/')[-1] filename = secure_filename(original_file_name) r = requests.get(source, allow_redirects=True) if r.status_code == 200: fname = util.get_random_string(4) + "-" + filename uploaded_file_dir = os.path.join(UPLOAD_DIR, fname) if not os.path.exists(UPLOAD_DIR): os.makedirs(UPLOAD_DIR) f = open(uploaded_file_dir, 'w', encoding='utf-8') f.write(r.text) f.close() else: error_msg = "the source %s can not be accessed" % source logger.debug(error_msg) return render_template('msg.html', msg=error_msg, msg_title="Error") headers = util.get_headers(uploaded_file_dir, file_type=file_type) if headers == []: error_msg = "Can't parse the source file " return render_template('msg.html', msg=error_msg, msg_title="Error") logger.debug("headers: ") logger.debug(str(headers)) labels = "" for o in ontologies: o_labels = None try: o_labels = util.get_classes_as_txt([o], data_dir=DATA_DIR) except: o_labels = util.get_classes_as_txt([o], data_dir=ONT_DIR) if o_labels: labels += o_labels logger.debug("labels: ") logger.debug(str(labels)) return render_template('editor.html', labels_txt=labels, ontologies_txt=",".join(ontologies), headers=headers, kg=kg, callback=callback_url, file_name=fname, error_msg=error_msg, warning_msg=warning_msg)