Beispiel #1
0
    def test_apply(self):
        assert_series_equal(self.ts.apply(np.sqrt), np.sqrt(self.ts))

        # elementwise-apply
        import math

        assert_series_equal(self.ts.apply(math.exp), np.exp(self.ts))

        # how to handle Series result, #2316
        result = self.ts.apply(lambda x: Series([x, x ** 2], index=["x", "x^2"]))
        expected = DataFrame({"x": self.ts, "x^2": self.ts ** 2})
        tm.assert_frame_equal(result, expected)

        # empty series
        s = Series(dtype=object, name="foo", index=pd.Index([], name="bar"))
        rs = s.apply(lambda x: x)
        tm.assert_series_equal(s, rs)
        # check all metadata (GH 9322)
        self.assertIsNot(s, rs)
        self.assertIs(s.index, rs.index)
        self.assertEqual(s.dtype, rs.dtype)
        self.assertEqual(s.name, rs.name)

        # index but no data
        s = Series(index=[1, 2, 3])
        rs = s.apply(lambda x: x)
        tm.assert_series_equal(s, rs)
Beispiel #2
0
    def test_apply_same_length_inference_bug(self):
        s = Series([1, 2])
        f = lambda x: (x, x + 1)

        result = s.apply(f)
        expected = s.map(f)
        assert_series_equal(result, expected)

        s = Series([1, 2, 3])
        result = s.apply(f)
        expected = s.map(f)
        assert_series_equal(result, expected)
 def test_date_tz(self):
     # GH11757
     rng = pd.DatetimeIndex(["2014-04-04 23:56", "2014-07-18 21:24", "2015-11-22 22:14"], tz="US/Eastern")
     s = Series(rng)
     expected = Series([date(2014, 4, 4), date(2014, 7, 18), date(2015, 11, 22)])
     assert_series_equal(s.dt.date, expected)
     assert_series_equal(s.apply(lambda x: x.date()), expected)
def rolling_mean(data, window, min_periods=1, center=False):
    if len(data) < 2:
        return data
    """ Function that computes a rolling mean

    Parameters
    ----------
    data : DataFrame or Series
           If a DataFrame is passed, the rolling_mean is computed for all columns.
    window : int or string
             If int is passed, window is the number of observations used for calculating
             the statistic, as defined by the function pd.rolling_mean()
             If a string is passed, it must be a frequency string, e.g. '90S'. This is
             internally converted into a DateOffset object, representing the window size.
    min_periods : int
                  Minimum number of observations in window required to have a value.

    Returns
    -------
    Series or DataFrame, if more than one column
    """

    def f(x):
        """Function to apply that actually computes the rolling mean"""
        offset = pd.datetools.to_offset(window)

        if center == False:
            dslice = col[x - offset.delta + timedelta(0, 0, 1) : x]
            # adding a microsecond because when slicing with labels start and endpoint
            # are inclusive
        else:
            dslice = col[x - offset.delta / 2 + timedelta(0, 0, 1) : x + pd.datetools.to_offset(window).delta / 2]
        if dslice.size < min_periods:
            return np.nan
        else:
            return dslice.mean()

    data = DataFrame(data.copy())
    dfout = DataFrame()
    if isinstance(window, int):
        dfout = pd.rolling_mean(data, window, min_periods=min_periods, center=center)
    elif isinstance(window, basestring):
        idx = Series(pd.to_datetime(data.index), index=data.index)
        for colname, col in data.iterkv():
            result = idx.apply(f)
            result.name = colname
            dfout = dfout.join(result, how="outer")
    if dfout.columns.size == 1:
        dfout = dfout.ix[:, 0]
    return dfout
    def _clean_description(self, element):
        """
		cleans up aggregated descriptions
		"""
        data = [x.getText() for x in element]
        data = Series(data)
        data = data.apply(lambda x: re.sub("\n", " ", x))
        mask = data.apply(lambda x: False if re.search("\(sanded|\(sealed|\(endgrain|\(curl|\(burl|^$", x) else True)
        data = data[mask]

        def func(item):
            try:
                return list(re.search("(.*?):(.*)", item).groups())
            except:
                return [item, None]

        data = data.apply(func).tolist()
        data = DataFrame(data, columns=["heading", "content"])

        mask = data.content.apply(lambda x: pd.notnull(x))
        if mask.shape[0] > 0:
            mask.ix[0] = True
        data = data[mask]
        return data
Beispiel #6
0
def convert_series_of_lists_to_df(column: pd.Series, prefix="", prefix_sep=""):
    """
    input:

     index    groups
        0     ['a','b','c']
        1     ['c']
        2     ['b','c','e']
        3     ['a','c']
        4     ['b','e']

    output:

    index   a   b   c   d   e
        0   1   1   1   0   0
        1   0   0   1   0   0
        2   0   1   1   0   1
        3   1   0   1   0   0
        4   0   1   0   0   0
    """

    return pd.get_dummies(column.apply(pd.Series), prefix=prefix, prefix_sep=prefix_sep).sum(level=0, axis=1)
Beispiel #7
0
def getPointArray(conshpfn):
    conShp = shapefile.Reader(conshpfn)
    conShapes = conShp.shapes()
    conShapeArray = []
    for conShape in conShapes:
        numOfShapePoints = len(conShape.points)
        conShapePartArray = copy.deepcopy(conShape.parts)
        conShapePartArray.append(numOfShapePoints)
        partPointsArray = []
        for partIndex in range(len(conShape.parts)):
            partPointsArray.append(conShape.points[conShapePartArray[partIndex] : conShapePartArray[partIndex + 1]])
        partPointsSeries = Series(partPointsArray)
        numOfPartPointsSeries = partPointsSeries.apply(lambda x: len(x))
        numOfPartPointsSeries = numOfPartPointsSeries.rank(method="first")
        rankDic = {}
        for i, numOfPartPointsSeriesItem in enumerate(numOfPartPointsSeries):
            rankDic[numOfPartPointsSeriesItem] = partPointsSeries[i]
        rankDicKeys = rankDic.keys()
        rankDicKeys.sort(reverse=True)
        sortedPartPointsArray = []
        for rankDicKey in rankDicKeys:
            sortedPartPointsArray.append(rankDic[rankDicKey])
        conShapeArray.append(sortedPartPointsArray)
    return conShapeArray
def cortical_thickness(
    xfms: pd.Series,  # nlin avg -> subject XfmHandler (iirc)...
    atlas: MincAtom,  # nlin avg
    label_mapping: FileAtom,
    atlas_fwhm: float,
    thickness_fwhm: float,
):

    try:
        import vtk
    except:
        warnings.warn("couldn't `import vtk`, without which `decimate.py` is unable to run ...")
        raise

    s = Stages()

    # generate thickness maps for the average:
    left_grid, right_grid = [
        s.defer(
            make_laplace_grid(input_labels=atlas.labels, label_mapping=label_mapping, binary_closing=True, side=side)
        )
        for side in (Side.left, Side.right)
    ]

    atlas_left_thickness, atlas_right_thickness = [
        s.defer(
            decimate(
                s.defer(
                    minclaplace(input_grid=grid, extra_args=["--create-surface-range", "0", "10"])
                ).surface,  # enclose entire cortex
                reduction=0.8,  # FIXME: magic number ... implement a way to specify number rather than fraction instead?
                smoothing_method=Smoothing.laplace,
            )
        )
        for grid in (left_grid, right_grid)
    ]

    # as per comment in MICe_thickness, blur atlas instead of transformed object files ... ?
    # (maybe this workaround is now obsolete)
    blurred_atlas = s.defer(mincblur(img=atlas, fwhm=atlas_fwhm)).img

    # TODO rename this dataframe
    resampled = (
        pd.DataFrame(
            {
                "xfm": xfms,
                # resample the atlas files to each subject:
                "blurred_atlas_grid_resampled": xfms.apply(
                    lambda xfm: s.defer(mincresample_new(img=blurred_atlas, xfm=xfm.xfm, like=xfm.target))
                ),
                "atlas_left_resampled": xfms.apply(
                    lambda xfm: s.defer(transform_objects(input_obj=atlas_left_thickness, xfm=xfm.xfm))
                ),
                "atlas_right_resampled": xfms.apply(
                    lambda xfm: s.defer(transform_objects(input_obj=atlas_right_thickness, xfm=xfm.xfm))
                ),
            }
        )
        .assign(
            left_grid=lambda df: df.xfm.map(
                lambda xfm: s.defer(
                    make_laplace_grid(
                        input_labels=xfm.target, label_mapping=label_mapping, binary_closing=True, side=Side.left
                    )
                )
            ),
            right_grid=lambda df: df.xfm.map(
                lambda xfm: s.defer(
                    make_laplace_grid(
                        input_labels=xfm.target, label_mapping=label_mapping, binary_closing=True, side=Side.right
                    )
                )
            ),
        )
        .assign(
            left_thickness=lambda df: df.apply(
                axis=1,
                func=lambda row: s.defer(
                    minclaplace(input_grid=row.left_grid, solution_vertices=row.atlas_left_resampled)
                ),
            ),
            right_thickness=lambda df: df.apply(
                axis=1,
                func=lambda row: s.defer(
                    minclaplace(input_grid=row.right_grid, solution_vertices=row.atlas_right_resampled)
                ),
            ),
        )
        .assign(
            smooth_left_fwhm=lambda df: df.apply(
                axis=1,
                func=lambda row: s.defer(
                    diffuse(
                        obj_file=row.atlas_left_resampled,
                        input_signal=row.left_thickness.solved,
                        kernel=thickness_fwhm,
                        iterations=1000,
                    )
                ),
            ),
            smooth_right_fwhm=lambda df: df.apply(
                axis=1,
                func=lambda row: s.defer(
                    diffuse(
                        obj_file=row.atlas_right_resampled,
                        input_signal=row.right_thickness.solved,
                        kernel=thickness_fwhm,
                        iterations=1000,
                    )
                ),
            ),
        )
    )
    return Result(stages=s, output=resampled)
Beispiel #9
0
    def test_apply_args(self):
        s = Series(["foo,bar"])

        result = s.apply(str.split, args=(",",))
        self.assertEqual(result[0], ["foo", "bar"])
        tm.assertIsInstance(result[0], list)
Beispiel #10
0
    def test_apply_dont_convert_dtype(self):
        s = Series(np.random.randn(10))

        f = lambda x: x if x > 0 else np.nan
        result = s.apply(f, convert_dtype=False)
        self.assertEqual(result.dtype, object)
# ```

# <markdowncell>

# **A9**:
# <pre>
# 0    0
# 1    1
# 2    2
# 3    3
# 4    4
# </pre>

# <codecell>

s1.apply(lambda k: 2 * k).sum()

# <markdowncell>

# **Q10**: What is
#
# ```Python
# s1.apply(lambda k: 2*k).sum()
# ```

# <markdowncell>

# **A10**:
# <pre>
# 10
# </pre>
Beispiel #12
0
    result = requests.get(url)
    if result.status_code == 200:
        # print 'Request succesful'
        return BeautifulSoup(result.text, "html.parser")
    else:
        print "Request failed", url
        return None


result = requests.post(link, data=pageLoad)
soupMed = BeautifulSoup(result.text, "html.parser")
# soupMed = BeautifulSoup(result.text)
# print soupMed

# print soupMed.find("a", {"class": "standart"})
# print soupMed.find("tr td + a")
names = [x.text for x in soupMed.find_all(class_="standart")]

# print names
# names = [ x.text for x in soupMed.find_all("a", {'clasx_': 'standart'})
names = Series(names)
print names.str.strip()

# regex_dosage = re.compile(r'\d+')
# regex_


"""
names.str.strip()
names.apply(lambda x : regex.findall(x))
"""
lower.apply(identity)

# <codecell>

# show that identity yields the same Series -- first on element by element basis

lower.apply(identity) == lower

# <codecell>

# Check that match happens for every element in the Series using numpy.all
# http://docs.scipy.org/doc/numpy/reference/generated/numpy.all.html

np.all(lower.apply(identity) == lower)

# <headingcell level=2>

# Let's use `lambda`

# <markdowncell>

# Sometimes it's convenient to write functions using `lambda`, especially short functions for doing a simple transformation of the parameters.  Only some functions can be rewritten with `lambda`.

# <codecell>


def add_preface(s):
    return "letter " + s


lower.apply(add_preface)

# <codecell>

# rewrite with lambda

lower.apply(lambda s: "letter " + s)
Beispiel #14
0
}

url = "http://base-donnees-publique.medicaments.gouv.fr/index.php#result"

result = requests.post(url, data=payload)
soup = BeautifulSoup(result.text)

names = [x.text for x in soup.find_all(class_="standart")]
# on obtient la zone de texte de notre recherche

names = Series(names)
# str => transforme en string. strip() => vire les espaces
names = names.str.strip()

# sur une expression reguliere, trouver les chiffres
regex_dosage = re.compile(r"\d+")
names.apply(lambda x: regex_dosage.findall(x))

# sur une expression reguliere, trouver unité mg
regex_unite = re.compile(r"(microgrammes|µg|grammes|gL)")
names.apply(lambda x: regex_unite.findall(x))

regex_form = re.compile(r"comprim\xe9 s\xe9cable")
names.apply(lambda x: regex_form.findall(x))

a = names.apply(lambda x: regex_dosage.findall(x))
b = names.apply(lambda x: regex_unite.findall(x))
c = names.apply(lambda x: regex_form.findall(x))

d = {"dosage": a, "unite": b, "forme": c}
e = DataFrame(d)
Beispiel #15
0
    "affNumero": 0,
    "isAlphabet": 0,
    "inClauseSubst": 0,
    "nomSubstances": "",
    "typeRecherche": 0,
    "choixRecherche": "medicament",
    "txtCaracteres": "levothyroxine",
    "btnMedic.x": 9,
    "btnMedic.y": 15,
    "btnMedic": "Rechercher",
    "radLibelle": 2,
    "txtCaracteresSub": "",
    "radLibelleSub": 4,
}

raw_data = requests.post("http://base-donnees-publique.medicaments.gouv.fr/index.php#result", data=payload).text
html = BeautifulSoup(raw_data)

drugss = html.findAll("a", class_="standart")

drugs = [drug.text for drug in drugss]

names = Series(drugs)

names.str.strip()

regex_dosage = re.compile(r"\d+")
regex_units = re.compile(r"(microgrammes|µg|grammes)")

names["dosage"] = names.apply(lambda x: regex_dosage.findall(x))
names["units"] = names.apply(lambda x: regex_units.findall(x))