Ejemplo n.º 1
0
def sub_from_dir(feat_dir, clfs, fnames, save_path=None, dist_sub=None, fillna=False,
                 maxes=None):
    preds = []
    dists = []
    chunk_paths = sorted(glob.glob(f'{feat_dir}/*.mp'))
    for path in tqdm_notebook(chunk_paths):
        test_feat_df = pd.read_msgpack(path).rename(columns=funcy.flip(MASSIVE_RENAMER))
        if fillna:
            test_feat_df = test_feat_df.fillna(0)
        if maxes is not None:
            test_feat_df = replacer(test_feat_df, maxes)
        preds_df = make_pred_df(clfs, fnames, test_feat_df.reset_index()).set_index(OBJECT_ID)
        preds.append(preds_df)
        if dist_sub is not None:
            dist = compare_subs(preds_df, dist_sub)
            dists[path] = dist
            if dist > .7:
                print(f'{path}: distance: {dist:.3f}')

    df = pd.concat(preds)
    class99 = GenUnknown(df)
    df['class_99'] = class99
    if OBJECT_ID in df.columns:
        df = df.set_index(OBJECT_ID)
    try:
        _save_sub(df, save_path)
    except Exception:
        print(f'couldnt save!')
    return df
Ejemplo n.º 2
0
    def name(self):
        if self._name is None:
            if self._scorer is not None:
                # try from scorer
                if isinstance(self._scorer,
                              sklearn.metrics.scorer._BaseScorer):
                    scorers = sklearn.metrics.scorer.SCORERS
                    matches = select_values(
                        lambda x: x == self._scorer, scorers)
                    matches = list(matches.keys())
                    if len(matches) == 1:
                        self._name = matches[0]
                    elif len(matches) > 1:
                        # unexpected
                        logger.debug(
                            'Unexpectedly found multiple matches for scorer '
                            'name {name}: {matches!r}'
                            .format(name=self._name, matches=matches))
                    else:
                        # must be a custom scorer, try to get name
                        if hasattr(self._scorer, '__name__'):
                            self._name = self._scorer.__name__
            elif self._description is not None:
                # try from description
                mapper = flip(SCORING_NAME_MAPPER)
                if self._description in mapper:
                    self._name = mapper[self._description]
                else:
                    # default formatting
                    self._name = '_'.join(self._description.lower().split(' '))

        if self._name is not None:
            return self._name
        else:
            raise BalletError('Could not get name from scorer')
Ejemplo n.º 3
0
def part2(numbers, end=30000000):
    # store previously seen numbers in a fast set
    seen = set(numbers[:-1])

    # store the indexes of previously seen numbers in a fast dict
    index = flip(dict(enumerate(numbers)))
    indexpre = {}

    # last and before-last numbers
    pre, prepre = numbers[-1], numbers[-1]

    for turn in range(len(numbers), end):
        # difference or zero
        pre = index[pre] - indexpre[pre] if pre in seen else 0

        # append number to history (with a log of 1)
        seen.add(prepre)
        prepre = pre

        # store the index of this number
        # and also carry over the lag 1 index of that
        old_index = index.get(pre, None)
        index[pre] = turn
        if old_index is not None:
            indexpre[pre] = old_index

    return pre
Ejemplo n.º 4
0
 def values(self, *fields, **expressions):
     """
     Extended version supporting renames:
         .values('id', 'name', author__name='author')
     """
     renames = select_values(isa(six.string_types), expressions)
     if not renames:
         return base.values(self, *fields, **expressions)
     elif django.VERSION >= (1, 11):
         rename_expressions = walk_values(F, renames)
         expressions.update(rename_expressions)
         return base.values(self, *fields, **expressions)
     else:
         f_to_name = flip(renames)
         rename = lambda d: {f_to_name.get(k, k): v for k, v in d.items()}
         return base.values(self, *chain(fields, f_to_name)).map(rename)
Ejemplo n.º 5
0
        except ValueError:
            continue
        for target in node.targets:
            if isinstance(target, ast.Name):
                yield target.id, value


# AST helpers

def get_ast(func):
    source_lines, lineno = inspect.getsourcelines(func)
    source = '\n' * (lineno - 1) + textwrap.dedent(''.join(source_lines))
    return ast.parse(source, inspect.getfile(func), 'single').body[0]

NAMED_CONSTS = {'None': None, 'True': True, 'False': False}
CONST_NAMES = flip(NAMED_CONSTS)


def is_literal(node):
    return isinstance(node, (ast.Str, ast.Num)) \
        or isinstance(node, ast.Name) and node.id in NAMED_CONSTS \
        or isinstance(node, (ast.List, ast.Tuple)) and all(is_literal, node.elts) \
        or isinstance(node, ast.Dict) and all(is_literal, node.keys + node.values)


def ast_eval(node):
    """
    Faster ast.literal_eval() with better error messages.
    Works only with nodes not strings.
    """
    if isinstance(node, ast.Num):
Ejemplo n.º 6
0
from ipykernel.kernelapp import IPKernelApp
from IPython.display import Markdown, display, HTML
from IPython.core.interactiveshell import InteractiveShell
def in_notebook(): return IPKernelApp.initialized()
def printmd(string): display(Markdown(string))
import warnings

display(HTML("<style>.container { width:90% !important; }</style>"))

# pretty print only the last output of the cell
# InteractiveShell.ast_node_interactivity = "last_expr" # "all" for all
try:
    with warnings.catch_warnings():
        import eli5
        from lightgbm.sklearn import LGBMClassifier, LGBMRegressor
        import seaborn as sns
        from PIL import Image
except ImportError:
    pass

Path.ls =  lambda self: sorted(list(self.iterdir()))
ParameterGrid.l = property(lambda self: list(self))
pd.Series.flipped_dict = property(lambda ser: funcy.flip(ser.to_dict()))

pd.DataFrame.dsort = descending_sort
pd.Series.dsort = descending_sort
pd.DataFrame.asort = ascending_sort
pd.Series.asort = ascending_sort

%alias_magic h history -p "-l 20 -u -g"
Ejemplo n.º 7
0
def get_etf_ticker_for_sector(sector):
    etf_ticker = funcy.flip(get_sector_etf_ticker_map())[sector]
    return etf_ticker