Example #1
0
 def __init__(self, download_path, tmp_path, captcha_cb, delay_cb, message_cb, settings=None, settings_provider_cls=None, settings_provider_args=None, debug=False, providers=None):
     self.log = SimpleLogger(self.__class__.__name__, log_level=debug and SimpleLogger.LOG_DEBUG or SimpleLogger.LOG_INFO)
     self.download_path = toString(download_path)
     self.tmp_path = toString(tmp_path)
     self.seekers = []
     providers = providers or SUBTITLES_SEEKERS
     for seeker in providers:
         provider_id = seeker.id
         default_settings = seeker.default_settings
         default_settings['enabled'] = {'type':'yesno', 'default':True, 'label':'Enabled', 'pos':-1}
         if settings_provider_cls is not None:
             settings = None
             settings_provider = settings_provider_cls(provider_id, default_settings, settings_provider_args)
             if hasattr(seeker, 'error') and seeker.error is not None:
                 settings_provider.setSetting('enabled', False)
                 self.seekers.append(ErrorSeeker(seeker, tmp_path, download_path, settings, settings_provider, captcha_cb, delay_cb, message_cb))
             else:
                 self.seekers.append(seeker(tmp_path, download_path, settings, settings_provider, captcha_cb, delay_cb, message_cb))
         elif settings is not None and provider_id in settings:
             settings_provider = None
             if hasattr(seeker, 'error') and seeker.error is not None:
                 self.seekers.append(ErrorSeeker(seeker, tmp_path, download_path, settings, settings_provider, captcha_cb, delay_cb, message_cb))
             else:
                 self.seekers.append(seeker(tmp_path, download_path, settings[provider_id], settings_provider, captcha_cb, delay_cb, message_cb))
         else:
             settings = None
             settings_provider = None
             if hasattr(seeker, 'error') and seeker.error is not None:
                 self.seekers.append(ErrorSeeker(seeker, tmp_path, download_path, settings, settings_provider, captcha_cb, delay_cb, message_cb))
             else:
                 self.seekers.append(seeker(tmp_path, download_path, settings, settings_provider, captcha_cb, delay_cb, message_cb))
def find_params(model, feature_set, trainX, trainY, grid_search=False):
    """
    Return parameter set for the model, either predefined
    or found through grid search.
    """    
    model_name = model.__class__.__name__
    params = INITIAL_PARAMS.get(model_name, {})

    try:
        with open('saved_params.json') as f:
            saved_params = json.load(f)
    except IOError:
        saved_params = {}

    if (grid_search and model_name in PARAM_GRID and toString(
            model, feature_set) not in saved_params):

        clf = GridSearchCV(model, PARAM_GRID[model_name], cv=10, n_jobs=6,
                           scoring="roc_auc")
        
        clf.fit(trainX, trainY)
        print "found params (%s > %.4f): %s" % (toString(model, feature_set), clf.best_score_, clf.best_params_)
        params.update(clf.best_params_)
        saved_params[toString(model, feature_set)] = params
        with open('saved_params.json', 'w') as f:
            json.dump(saved_params, f, indent=4, separators=(',', ': '),
                      ensure_ascii=True, sort_keys=True)
    else:
        params.update(saved_params.get(toString(model, feature_set), {}))
        if grid_search:
            print "using params %s: %s" % (toString(model, feature_set), params)

    params = convert(params)

    return params
Example #3
0
 def getSubtitles(self, providers, updateCB=None, title=None, filepath=None, langs=None, year=None, tvshow=None, season=None, episode=None, timeout=10):
     self.log.info('getting subtitles list - title: %s, filepath: %s, year: %s, tvshow: %s, season: %s, episode: %s' % (
         toString(title), toString(filepath), toString(year), toString(tvshow), toString(season), toString(episode)))
     subtitlesDict = {}
     threads = []
     socket.setdefaulttimeout(timeout)
     lock = threading.Lock()
     if len(providers) == 1:
         provider = providers[0]
         if isinstance(provider, basestring):
             provider = self.getProvider(providers[0])
         if provider.error is not None:
             self.log.debug("provider '%s' has 'error' flag set, skipping...", provider)
             return subtitlesDict
         else:
             self._searchSubtitles(lock, subtitlesDict, updateCB, provider, title, filepath, langs, season, episode, tvshow, year)
     else:
         for provider in providers:
             if isinstance(provider, basestring):
                 provider = self.getProvider(provider)
             if provider.error is not None:
                 self.log.debug("provider '%s' has 'error' flag set, skipping...", provider)
             else:
                 threads.append(threading.Thread(target=self._searchSubtitles, args=(lock, subtitlesDict, updateCB, provider, title, filepath, langs, season, episode, tvshow, year)))
         for t in threads:
             t.setDaemon(True)
             t.start()
         working = True
         while working:
             working = False
             time.sleep(0.5)
             for t in threads:
                 working = working or t.is_alive()
     socket.setdefaulttimeout(socket.getdefaulttimeout())
     return subtitlesDict
Example #4
0
 def htmlToMd(self, title, url):
     page = utils.toString(self.requestUrl(url))
     print "-- article-- %s %s" % (url, title)
     if (page):
         soup = BeautifulSoup(page, 'html.parser')
         entry = utils.toString(soup.find_all('div', {'class': 'entry'})[0])
         print entry
         mdStr = utils.toString(md(entry))
         id = self.getArticleId(url)
         self.saveToFiles(id + "_" + utils.toString(title), mdStr)
    def fit(self, y, train):

        y_train = y
        X_train = train

        for model, hyperfeatures in self.models:
            if self.log != None:
                        print >> self.log, "Fitting [%s]" % (toString(model, hyperfeatures))
            else:
                print "Fitting [%s]"  % (toString(model, hyperfeatures))

            model_preds = model.fit(X_train, y_train)
    def fit(self, y, train):

        y_train = y
        X_train = train

        for model, hyperfeatures in self.models:
            if self.log != None:
                print >> self.log, "Fitting [%s]" % (toString(
                    model, hyperfeatures))
            else:
                print "Fitting [%s]" % (toString(model, hyperfeatures))

            model_preds = model.fit(X_train, y_train)
Example #7
0
 def rarSubNameCB(result, retval, extra_args):
     if retval == 0:
         print '[Unrar] getting rar sub name', result
         rarSubNames = result.split('\n')
         rarPath = extra_args[0]
         destDir = extra_args[1]
         try:
             for subName in rarSubNames:
                 os.unlink(os.path.join(destDir, subName))
         except OSError as e:
             print e
         # unrar needs rar Extension?
         if os.path.splitext(rarPath)[1] != '.rar':
             oldRarPath = rarPath
             rarPath = os.path.splitext(rarPath)[0] + '.rar'
             shutil.move(oldRarPath, rarPath)
         cmdRarUnpack = 'unrar e "%s" %s' % (rarPath, destDir)
         Console().ePopen(toString(cmdRarUnpack), rarUnpackCB, (tuple(rarSubNames),))
     else:
         try:
             os.unlink(extra_args[0])
         except OSError:
             pass
         print '[Unrar] problem when getting rar sub name:', result
         errorCB(_("unpack error: cannot get subname"))
Example #8
0
 def rarSubNameCB(result, retval, extra_args):
     if retval == 0:
         print '[Unrar] getting rar sub name', result
         rarSubNames = result.split('\n')
         rarPath = extra_args[0]
         destDir = extra_args[1]
         try:
             for subName in rarSubNames:
                 os.unlink(os.path.join(destDir, subName))
         except OSError as e:
             print e
         # unrar needs rar Extension?
         if os.path.splitext(rarPath)[1] != '.rar':
             oldRarPath = rarPath
             rarPath = os.path.splitext(rarPath)[0] + '.rar'
             shutil.move(oldRarPath, rarPath)
         cmdRarUnpack = 'unrar e "%s" %s' % (rarPath, destDir)
         Console().ePopen(toString(cmdRarUnpack), rarUnpackCB,
                          (tuple(rarSubNames), ))
     else:
         try:
             os.unlink(extra_args[0])
         except OSError:
             pass
         print '[Unrar] problem when getting rar sub name:', result
         errorCB(_("unpack error: cannot get subname"))
Example #9
0
    def getArticles(self, archiveEle):
        liEles = archiveEle.find_all('div', {'class': 'post floated-thumb'})
        liArticles = []
        for liEle in liEles:
            liArticleEles = liEle.select('.meta-title ')
            for articleEle in liArticleEles:
                url = articleEle['href']
                title = articleEle.contents[0]

                liArticles.append(
                    Article(utils.toString(title), utils.toString(url)))

            # tag
            liArticleTags = liEle.find_all('a', attrs={"rel": "category tag"})
            if liArticleTags:
                tag = liArticleTags[0].contents[0]
                print "tag:  %s" % tag
        for it in liArticles:
            print it.getTitle()
            print it.getLink()
        return liArticles
Example #10
0
 def _process_path(self, subfile, current_encoding=None) :
     filename = os.path.basename(subfile)
     size = getFileSize(subfile)
     if size and size > SUBTITLES_FILE_MAX_SIZE:
         self.log.error("<%s> not supported subtitles size ({%d}KB > {%d}KB)!", filename, size / 1024, SUBTITLES_FILE_MAX_SIZE / 1024)
         raise LoadError('"%s" - not supported subtitles size: "%dKB"' % (toString(os.path.basename(subfile)), size / 1024))
     try:
         text = load(subfile)
     except (URLError, HTTPError, IOError) as e:
         self.log.error("<%s> %s", filename, str(e))
         raise LoadError(subfile)
     try:
         decoded_text, encoding = decode(text, self._encodings, current_encoding)
     except Exception as e:
         self.log.error("<%s> %s", filename, "cannot decode")
         raise DecodeError(subfile)
     return decoded_text, encoding
Example #11
0
 def _process_path(self, subfile, current_encoding=None):
     filename = os.path.basename(subfile)
     size = getFileSize(subfile)
     if size and size > SUBTITLES_FILE_MAX_SIZE:
         self.log.error(
             "<%s> not supported subtitles size ({%d}KB > {%d}KB)!",
             filename, size / 1024, SUBTITLES_FILE_MAX_SIZE / 1024)
         raise LoadError('"%s" - not supported subtitles size: "%dKB"' %
                         (toString(os.path.basename(subfile)), size / 1024))
     try:
         text = load(subfile)
     except (URLError, HTTPError, IOError) as e:
         self.log.error("<%s> %s", filename, str(e))
         raise LoadError(subfile)
     try:
         decoded_text, encoding = decode(text, self._encodings,
                                         current_encoding)
     except Exception as e:
         self.log.error("<%s> %s", filename, "cannot decode")
         raise DecodeError(subfile)
     return decoded_text, encoding
Example #12
0
 def downloadSubtitle(self, selected_subtitle, subtitles_dict, choosefile_cb, path=None, fname=None, overwrite_cb=None, settings=None):
     self.log.info('downloading subtitle "%s" with settings "%s"' % (selected_subtitle['filename'], toString(settings) or {}))
     if settings is None:
         settings = {}
     seeker = None
     for provider_id in subtitles_dict.keys():
         if selected_subtitle in subtitles_dict[provider_id]['list']:
             seeker = self.getProvider(provider_id)
             break
     if seeker is None:
         self.log.error('provider for "%s" subtitle was not found', selected_subtitle['filename'])
     lang, filepath = seeker.download(subtitles_dict[provider_id], selected_subtitle)[1:3]
     compressed = getCompressedFileType(filepath)
     if compressed:
         subfiles = self._unpack_subtitles(filepath, self.tmp_path)
     else:
         subfiles = [filepath]
     subfiles = [toString(s) for s in subfiles]
     if len(subfiles) == 0:
         self.log.error("no subtitles were downloaded!")
         raise SubtitlesDownloadError(msg="[error] no subtitles were downloaded")
     elif len(subfiles) == 1:
         self.log.debug('found one subtitle: "%s"', str(subfiles))
         subfile = subfiles[0]
     else:
         self.log.debug('found more subtitles: "%s"', str(subfiles))
         subfile = choosefile_cb(subfiles)
         if subfile is None:
             self.log.debug('no subtitles file choosed!')
             return
         self.log.debug('selected subtitle: "%s"', subfile)
     ext = os.path.splitext(subfile)[1]
     if ext not in self.SUBTILES_EXTENSIONS:
         ext = os.path.splitext(toString(selected_subtitle['filename']))[1]
         if ext not in self.SUBTILES_EXTENSIONS:
             ext = '.srt'
     if fname is None:
         filename = os.path.basename(subfile)
         save_as = settings.get('save_as', 'default')
         if save_as == 'version':
             self.log.debug('filename creating by "version" setting')
             filename = toString(selected_subtitle['filename'])
             if os.path.splitext(filename)[1] not in self.SUBTILES_EXTENSIONS:
                 filename = os.path.splitext(filename)[0] + ext
         elif save_as == 'video':
             self.log.debug('filename creating by "video" setting')
             videopath = toString(subtitles_dict[seeker.id]['params'].get('filepath'))
             filename = os.path.splitext(os.path.basename(videopath))[0] + ext
 
         if settings.get('lang_to_filename', False):
             lang_iso639_1_2 = toString(languageTranslate(lang, 0, 2))
             self.log.debug('appending language "%s" to filename', lang_iso639_1_2)
             filename, ext = os.path.splitext(filename)
             filename = "%s.%s%s" % (filename, lang_iso639_1_2, ext)
     else:
         self.log.debug('using provided filename')
         filename = toString(fname) + ext
     self.log.debug('filename: "%s"', filename)
     download_path = os.path.join(toString(self.download_path), filename)
     if path is not None:
         self.log.debug('using custom download path: "%s"', path)
         download_path = os.path.join(toString(path), filename)
     self.log.debug('download path: "%s"', download_path)
     if os.path.isfile(download_path) and overwrite_cb is not None:
         ret = overwrite_cb(download_path)
         if ret is None:
             self.log.debug('overwrite cancelled, returning temp path')
             return subfile
         elif not ret:
             self.log.debug('not overwriting, returning temp path')
             return subfile
         elif ret:
             self.log.debug('overwriting')
             try:
                 shutil.move(subfile, download_path)
                 return download_path
             except Exception as e:
                 self.log.error('moving "%s" to "%s" - %s' % (
                     os.path.split(subfile)[-2:], 
                     os.path.split(download_path)[-2:]), str(e))
                 return subfile
     try:
         shutil.move(subfile, download_path)
     except Exception as e:
         self.log.error('moving "%s" to "%s" - %s', (
             os.path.split(subfile)[-2:],
             os.path.split(download_path)[-2:]), str(e))
         return subfile
     return download_path
    def predict(self, y, train, predict, y_test, show_steps=True):
        
        stage0_train = []
        stage0_predict = [] 

        models_score = []
        means_score = []
        stacks_score = []

        models_f1 = []
        means_f1 = []
        stacks_f1 = []

        y_train = y
        X_train = train
        X_predict = predict

        for model, hyperfeatures in self.models:

            model_preds = self._get_model_preds(model, X_predict)
            model_score = self._get_model_score(model, X_predict, y_test)             
            stage0_predict.append(model_preds)
        
            # if stacking, compute cross-validated predictions on the train set
            if self.stack:
                model_cv_preds = self._get_model_cv_preds(model, X_train, y_train)
                stage0_train.append(model_cv_preds)

            # verbose mode: compute metrics after every model computation
            if show_steps:                
                    mean_preds, stack_preds, fwls_preds = self._combine_preds(
                        np.array(stage0_train).T, np.array(stage0_predict).T,
                        y_train, train, predict,
                        stack=self.stack, fwls=self.fwls)

                    #model_auc = compute_auc(y_test, stage0_predict[-1])
                    #mean_auc = compute_auc(y_test, mean_preds)
                    #stack_auc = compute_auc(y_test, stack_preds) \
                    #    if self.stack else 0
                    #fwls_auc = compute_auc(y_test, fwls_preds) \
                    #    if self.fwls else 0
                    #
                    #if self.log != None:
                    #    print >> self.log, "> AUC: %.4f (%.4f, %.4f, %.4f) [%s]" % (model_auc, mean_auc, stack_auc, fwls_auc, toString(model, hyperfeatures))
                    #else:
                    #    print "> AUC: %.4f (%.4f, %.4f, %.4f) [%s]" % (model_auc, mean_auc, stack_auc, fwls_auc, toString(model, hyperfeatures))

                    model_preds_bin, mean_preds_bin, stack_preds_bin = self._binary_preds(model_preds, mean_preds, stack_preds)
                    model_score = compute_score(y_test, model_preds_bin)
                    mean_score = compute_score(y_test, mean_preds_bin)
                    stack_score = compute_score(y_test, stack_preds_bin) \
                        if self.stack else 0
                    models_score.append(model_score)
                    means_score.append(mean_score)
                    stacks_score.append(stack_score) \
                        if self.stack else 0            

                    if self.log != None:
                        print >> self.log, "> Score: %.4f (%.4f, %.4f) [%s]" % (model_score, mean_score, stack_score, toString(model, hyperfeatures))
                    else:
                        print "> Score: %.4f (%.4f, %.4f) [%s]" % (model_score, mean_score, stack_score, toString(model, hyperfeatures))
                
                    model_f1 = compute_f1_score(y_test, model_preds_bin)
                    mean_f1 = compute_f1_score(y_test, mean_preds_bin)
                    stack_f1 = compute_f1_score(y_test, stack_preds_bin) \
                        if self.stack else 0
                    models_f1.append(model_f1)
                    means_f1.append(mean_f1)
                    stacks_f1.append(stack_f1) \
                        if self.stack else 0

                    if self.log != None:
                        print >> self.log, "> F1: %.4f (%.4f, %.4f) [%s]" % (model_f1, mean_f1, stack_f1, toString(model, hyperfeatures))
                    else:
                        print "> F1: %.4f (%.4f, %.4f) [%s]" % (model_f1, mean_f1, stack_f1, toString(model, hyperfeatures))            

        if self.model_selection and predict is not None:

            #best_subset = self._find_best_auc_subset(y_test, stage0_predict)  
            best_subset = self._find_best_score_subset(y_test, stage0_predict)  

            stage0_train = [pred for i, pred in enumerate(stage0_train)
                            if i in best_subset]

            stage0_predict = [pred for i, pred in enumerate(stage0_predict)
                              if i in best_subset]

        mean_preds, stack_preds, fwls_preds = self._combine_preds(
            np.array(stage0_train).T, np.array(stage0_predict).T,
            y_train, stack=self.stack, fwls=self.fwls)

        if self.stack:
            selected_preds = stack_preds if not self.fwls else fwls_preds
        else:
            selected_preds = mean_preds

        return selected_preds, models_score, models_f1
Example #14
0
def main(argv: List[str]):
    distance = 0
    try:
        opts, args = getopt.getopt(argv, "d:", ['help', 'distance='])
    except getopt.GetoptError:
        usage()
        sys.exit(2)

    for opt, arg in opts:
        if opt == '--help':
            usage()
            sys.exit()
        elif opt in ('-d', '--distance'):
            try:
                distance = float(arg)
            except ValueError:
                print("Distance '" + arg +
                      "' is not a valid number (int or float)")
                sys.exit(2)

    for file in args:
        try:
            f = open(file)
        except FileNotFoundError as fne:
            print(fne.strerror + ": '" + file + "'")
            sys.exit(2)

        lines = f.readlines()
        f.close()

        vertices = []
        faces = []
        info = []
        for line in lines:
            if line[0] == 'v':
                vertices.append(utils.toVertex(line))
            elif line[0] == 'f':
                faces.append(utils.toFace(line))
            else:
                info.append(line)

        n_vertices = len(vertices)
        n_faces = len(faces)
        '''
        Step 1
        For each vertex, add distance * normalVector to it, appending result to the vertex array
        '''
        normal = utils.normalVector(vertices, faces)
        normal = [n * distance for n in normal]
        for i in range(n_vertices):
            vertices.append([v + n_i for (v, n_i) in zip(vertices[i], normal)])
        '''
        Step 2
        Create list of boundary edges
        '''
        combinations = []
        for face in faces:
            combinations.append([(face[0], face[1]), (face[1], face[2]),
                                 (face[2], face[0])])

        edges = [edge for face in combinations for edge in face]
        boundary_edges = utils.getBoundaryEdges(edges)
        '''
        Step 3
        For each face, create the extruded face, adding distance * normalVector to its vertices
        '''
        for i in range(n_faces):
            faces.append([v + n_vertices for v in faces[i]])
        '''
        Step 4
        Create sides of extruded shape. For each boundary edge, add the new faces
        '''
        for edge in boundary_edges:
            faces.append([edge[0], edge[1], edge[1] + n_vertices])
            faces.append([edge[1] + n_vertices, edge[0] + n_vertices, edge[0]])
        '''
        Step 5
        Invert normal vector of original faces
        '''
        for i in range(n_faces):
            faces[i].reverse()

        new_file = os.path.splitext(file)
        with open(new_file[0] + '_extruded' + new_file[1], 'w') as f:
            f.write(''.join(info))
            f.write(''.join(utils.toString(vertices, 'v')))
            f.write(''.join(utils.toString(faces, 'f')))
Example #15
0
def main(argv: List[str]):
    steps = 0
    angle = 0
    rotation = 'z'
    try:
        opts, args = getopt.getopt(argv, "s:a:r:",
                                   ['help', 'steps=', 'angle=', 'rotation='])
    except getopt.GetoptError:
        usage()
        sys.exit(2)

    for opt, arg in opts:
        if opt == '--help':
            usage()
            sys.exit()
        elif opt in ('-s', '--steps'):
            try:
                steps = int(arg)
            except ValueError:
                print("Steps '" + arg + "' is not a valid number (int)")
                sys.exit(2)
        elif opt in ('-a', '--angle'):
            try:
                angle = float(arg)
            except ValueError:
                print("Angle '" + arg +
                      "' is not a valid number (int or float)")
                sys.exit(2)
        elif opt in ('-r', '--rotation'):
            if arg in ('x', 'X', ' x', ' X'):
                rotation = 'x'
            elif arg in ('y', 'Y', ' y', ' Y'):
                rotation = 'y'

    angle_per_step = angle / steps

    for file in args:
        try:
            f = open(file)
        except FileNotFoundError as fne:
            print(fne.strerror + ": '" + file + "'")
            sys.exit(2)

        lines = f.readlines()
        f.close()

        vertices = []
        ls = []
        info = []
        for line in lines:
            if line[0] == 'v':
                vertices.append(utils.toVertex(line))
            elif line[0] == 'l':
                ls.append(utils.toFace(line))
            else:
                info.append(line)

        n_vertices = len(vertices)
        n_ls = len(ls)

        faces = []
        for s in range(steps):
            for i in range(n_vertices * s, n_vertices * (s + 1)):
                vertices.append(
                    utils.rotate(vertices[i], -angle_per_step, rotation))

            for i in range(n_ls * s, n_ls * (s + 1)):
                ls.append([ls[i][0] + n_vertices, ls[i][1] + n_vertices])
                faces.append([ls[i][0] + n_vertices, ls[i][1], ls[i][0]])
                faces.append(
                    [ls[i][0] + n_vertices, ls[i][1] + n_vertices, ls[i][1]])

        new_file = os.path.splitext(file)
        with open(new_file[0] + '_lathe' + new_file[1], 'w') as f:
            f.write(''.join(info))
            f.write(''.join(utils.toString(vertices, 'v')))
            f.write(''.join(utils.toString(faces, 'f')))
    }
    ) {
        id
        date
        dailyVolumeETH
        dailyVolumeUSD
        dailyVolumeToken
		priceUSD
    }
}
""" % (token_id, start_date, end_date)
r = requests.post(url, json={'query': query})
json_data = json.loads(r.text)

# write data file
filename = 'data/TokenDayData_{}_{}.csv'.format(
    token_symbol,
    datetime.datetime.now().strftime('%Y.%m.%d_%H:%M:%S'))
with open(filename, mode='w') as csv_file:
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(
        ['날짜', '토큰', '거래량(ETH)', '거래량(USD)', '거래량(token)', 'USD가격'])
    for data in json_data.get('data').get('tokenDayDatas'):
        csv_writer.writerow([
            toString(data.get('date')), token_name,
            data.get('dailyVolumeETH'),
            data.get('dailyVolumeUSD'),
            data.get('dailyVolumeToken'),
            data.get('priceUSD')
        ])
    def predict(self, y, train, predict, y_test, show_steps=True):

        stage0_train = []
        stage0_predict = []

        models_score = []
        means_score = []
        stacks_score = []

        models_f1 = []
        means_f1 = []
        stacks_f1 = []

        y_train = y
        X_train = train
        X_predict = predict

        for model, hyperfeatures in self.models:

            model_preds = self._get_model_preds(model, X_predict)
            model_score = self._get_model_score(model, X_predict, y_test)
            stage0_predict.append(model_preds)

            # if stacking, compute cross-validated predictions on the train set
            if self.stack:
                model_cv_preds = self._get_model_cv_preds(
                    model, X_train, y_train)
                stage0_train.append(model_cv_preds)

            # verbose mode: compute metrics after every model computation
            if show_steps:
                mean_preds, stack_preds, fwls_preds = self._combine_preds(
                    np.array(stage0_train).T,
                    np.array(stage0_predict).T,
                    y_train,
                    train,
                    predict,
                    stack=self.stack,
                    fwls=self.fwls)

                #model_auc = compute_auc(y_test, stage0_predict[-1])
                #mean_auc = compute_auc(y_test, mean_preds)
                #stack_auc = compute_auc(y_test, stack_preds) \
                #    if self.stack else 0
                #fwls_auc = compute_auc(y_test, fwls_preds) \
                #    if self.fwls else 0
                #
                #if self.log != None:
                #    print >> self.log, "> AUC: %.4f (%.4f, %.4f, %.4f) [%s]" % (model_auc, mean_auc, stack_auc, fwls_auc, toString(model, hyperfeatures))
                #else:
                #    print "> AUC: %.4f (%.4f, %.4f, %.4f) [%s]" % (model_auc, mean_auc, stack_auc, fwls_auc, toString(model, hyperfeatures))

                model_preds_bin, mean_preds_bin, stack_preds_bin = self._binary_preds(
                    model_preds, mean_preds, stack_preds)
                model_score = compute_score(y_test, model_preds_bin)
                mean_score = compute_score(y_test, mean_preds_bin)
                stack_score = compute_score(y_test, stack_preds_bin) \
                    if self.stack else 0
                models_score.append(model_score)
                means_score.append(mean_score)
                stacks_score.append(stack_score) \
                    if self.stack else 0

                if self.log != None:
                    print >> self.log, "> Score: %.4f (%.4f, %.4f) [%s]" % (
                        model_score, mean_score, stack_score,
                        toString(model, hyperfeatures))
                else:
                    print "> Score: %.4f (%.4f, %.4f) [%s]" % (
                        model_score, mean_score, stack_score,
                        toString(model, hyperfeatures))

                model_f1 = compute_f1_score(y_test, model_preds_bin)
                mean_f1 = compute_f1_score(y_test, mean_preds_bin)
                stack_f1 = compute_f1_score(y_test, stack_preds_bin) \
                    if self.stack else 0
                models_f1.append(model_f1)
                means_f1.append(mean_f1)
                stacks_f1.append(stack_f1) \
                    if self.stack else 0

                if self.log != None:
                    print >> self.log, "> F1: %.4f (%.4f, %.4f) [%s]" % (
                        model_f1, mean_f1, stack_f1,
                        toString(model, hyperfeatures))
                else:
                    print "> F1: %.4f (%.4f, %.4f) [%s]" % (
                        model_f1, mean_f1, stack_f1,
                        toString(model, hyperfeatures))

        if self.model_selection and predict is not None:

            #best_subset = self._find_best_auc_subset(y_test, stage0_predict)
            best_subset = self._find_best_score_subset(y_test, stage0_predict)

            stage0_train = [
                pred for i, pred in enumerate(stage0_train) if i in best_subset
            ]

            stage0_predict = [
                pred for i, pred in enumerate(stage0_predict)
                if i in best_subset
            ]

        mean_preds, stack_preds, fwls_preds = self._combine_preds(
            np.array(stage0_train).T,
            np.array(stage0_predict).T,
            y_train,
            stack=self.stack,
            fwls=self.fwls)

        if self.stack:
            selected_preds = stack_preds if not self.fwls else fwls_preds
        else:
            selected_preds = mean_preds

        return selected_preds, models_score, models_f1
Example #18
0
    def downloadSubtitle(self,
                         selected_subtitle,
                         subtitles_dict,
                         choosefile_cb,
                         path=None,
                         fname=None,
                         overwrite_cb=None,
                         settings=None):
        self.log.info(
            'downloading subtitle "%s" with settings "%s"' %
            (selected_subtitle['filename'], toString(settings) or {}))
        if settings is None:
            settings = {}
        seeker = None
        for provider_id in subtitles_dict.keys():
            if selected_subtitle in subtitles_dict[provider_id]['list']:
                seeker = self.getProvider(provider_id)
                break
        if seeker is None:
            self.log.error('provider for "%s" subtitle was not found',
                           selected_subtitle['filename'])
        lang, filepath = seeker.download(subtitles_dict[provider_id],
                                         selected_subtitle)[1:3]
        compressed = getCompressedFileType(filepath)
        if compressed:
            subfiles = self._unpack_subtitles(filepath, self.tmp_path)
        else:
            subfiles = [filepath]
        subfiles = [toString(s) for s in subfiles]
        if len(subfiles) == 0:
            self.log.error("no subtitles were downloaded!")
            raise SubtitlesDownloadError(
                msg="[error] no subtitles were downloaded")
        elif len(subfiles) == 1:
            self.log.debug('found one subtitle: "%s"', str(subfiles))
            subfile = subfiles[0]
        else:
            self.log.debug('found more subtitles: "%s"', str(subfiles))
            subfile = choosefile_cb(subfiles)
            if subfile is None:
                self.log.debug('no subtitles file choosed!')
                return
            self.log.debug('selected subtitle: "%s"', subfile)
        ext = os.path.splitext(subfile)[1]
        if ext not in self.SUBTILES_EXTENSIONS:
            ext = os.path.splitext(toString(selected_subtitle['filename']))[1]
            if ext not in self.SUBTILES_EXTENSIONS:
                ext = '.srt'
        if fname is None:
            filename = os.path.basename(subfile)
            save_as = settings.get('save_as', 'default')
            if save_as == 'version':
                self.log.debug('filename creating by "version" setting')
                filename = toString(selected_subtitle['filename'])
                if os.path.splitext(
                        filename)[1] not in self.SUBTILES_EXTENSIONS:
                    filename = os.path.splitext(filename)[0] + ext
            elif save_as == 'video':
                self.log.debug('filename creating by "video" setting')
                videopath = toString(
                    subtitles_dict[seeker.id]['params'].get('filepath'))
                filename = os.path.splitext(
                    os.path.basename(videopath))[0] + ext

            if settings.get('lang_to_filename', False):
                lang_iso639_1_2 = toString(languageTranslate(lang, 0, 2))
                self.log.debug('appending language "%s" to filename',
                               lang_iso639_1_2)
                filename, ext = os.path.splitext(filename)
                filename = "%s.%s%s" % (filename, lang_iso639_1_2, ext)
        else:
            self.log.debug('using provided filename')
            filename = toString(fname) + ext
        self.log.debug('filename: "%s"', filename)
        download_path = os.path.join(toString(self.download_path), filename)
        if path is not None:
            self.log.debug('using custom download path: "%s"', path)
            download_path = os.path.join(toString(path), filename)
        self.log.debug('download path: "%s"', download_path)
        if os.path.isfile(download_path) and overwrite_cb is not None:
            ret = overwrite_cb(download_path)
            if ret is None:
                self.log.debug('overwrite cancelled, returning temp path')
                return subfile
            elif not ret:
                self.log.debug('not overwriting, returning temp path')
                return subfile
            elif ret:
                self.log.debug('overwriting')
                try:
                    shutil.move(subfile, download_path)
                    return download_path
                except Exception as e:
                    self.log.error(
                        'moving "%s" to "%s" - %s' %
                        (os.path.split(subfile)[-2:],
                         os.path.split(download_path)[-2:]), str(e))
                    return subfile
        try:
            shutil.move(subfile, download_path)
        except Exception as e:
            self.log.error('moving "%s" to "%s" - %s',
                           (os.path.split(subfile)[-2:],
                            os.path.split(download_path)[-2:]), str(e))
            return subfile
        return download_path
    def fit_predict(self, trainX, trainY, testX, testY):
        """

        """

        X_train, X_cv, y_train, y_cv = cross_validation.train_test_split(trainX, trainY, test_size=0.5, random_state=SEED)

        predict = []
        # === Combine Models === #
        # Do a linear combination using a cross_validated data split
        for model in self.models:
            model.fit(X_cv, y_cv) 
            preds_model = model.predict_proba(X_train)[:, 1]            
            predict.append(preds_model)

            model_auc = compute_auc(y_train, preds_model)
            print "> AUC: %.4f [%s]" % (model_auc, toString(model))


        preds = np.hstack(tuple(predict)).reshape(len(predict),len(predict[-1])).transpose()
        preds[preds>0.9999999]=0.9999999
        preds[preds<0.0000001]=0.0000001
        preds = -np.log((1-preds)/preds)
        modelEN1 = linear_model.LogisticRegression()
        modelEN1.fit(preds, y_train)
        print "modelEN1.coef %s" % (modelEN1.coef_)

        predict = []
        for model in self.models:
            model.fit(X_train, y_train) 
            preds_model = model.predict_proba(X_cv)[:, 1]
            predict.append(preds_model)  

            model_auc = compute_auc(y_cv, preds_model)
            print "> AUC: %.4f [%s]" % (model_auc, toString(model))

            
        preds = np.hstack(tuple(predict)).reshape(len(predict),len(predict[-1])).transpose()
        preds[preds>0.9999999]=0.9999999
        preds[preds<0.0000001]=0.0000001
        preds = -np.log((1-preds)/preds)
        modelEN2 = linear_model.LogisticRegression()
        modelEN2.fit(preds, y_cv)
        print "modelEN2.coef %s" % (modelEN2.coef_)

        model_coefs = []
        for index in range(len(modelEN1.coef_[0])):
            model_coefs.append(modelEN1.coef_[0][index] + modelEN2.coef_[0][index])
            
    
    

        # === Predictions === #
        # When making predictions, retrain the model on the whole training set
        predict = []
        index = 0
        final_preds = np.zeros((testX.shape[0], ))
        
        
        for model in self.models:
            model.fit(trainX, trainY)
            preds_model = model.predict_proba(testX)[:, 1]
            preds_model[preds_model>0.9999999]=0.9999999
            preds_model[preds_model<0.0000001]=0.0000001
            preds_model = -np.log((1-preds_model)/preds_model)
            predict.append(preds_model)

            temp = model_coefs[index] * preds_model
            final_preds = final_preds + model_coefs[index] * preds_model

            index = index + 1

        mean_auc = compute_auc(testY, final_preds)

        print "> AUC: %.4f " % (mean_auc)
Example #20
0
    uniswapDayDatas(
    where: {
        date_gte: %s
        date_lte: %s
    }
    ) {
        id
        date
        dailyVolumeETH
        dailyVolumeUSD
        txCount
    }
}
""" % (start_date, end_date)

r = requests.post(url, json={'query': query})
json_data = json.loads(r.text)

# write data file
filename = 'data/UniswapDayData_{}.csv'.format(
    datetime.datetime.now().strftime('%Y.%m.%d_%H:%M:%S'))
with open(filename, mode='w') as csv_file:
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(['날짜', '거래량(ETH)', '거래량(USD)', 'transaction 수'])
    for data in json_data.get('data').get('uniswapDayDatas'):
        csv_writer.writerow([
            toString(data.get('date')),
            data.get('dailyVolumeETH'),
            data.get('dailyVolumeUSD'),
            data.get('txCount')
        ])